PyPI - keras-hub-nightly - Versions diffs - 0.19.0.dev202502190348__py3-none-any.whl → 0.19.0.dev202502210346__py3-none-any.whl - Mend

keras-hub-nightly 0.19.0.dev202502190348py3-none-any.whl → 0.19.0.dev202502210346py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

keras_hub/src/models/backbone.py CHANGED Viewed

@@ -186,6 +186,14 @@ class Backbone(keras.Model):
         saver = get_preset_saver(preset_dir)
         saver.save_backbone(self)
+    def get_lora_target_names(self):
+        """Returns list of layer names which are to be LoRA-fied.
+        Subclasses can override this method if the names of layers to be
+        LoRa-fied are different.
+        """
+        return ["query_dense", "value_dense", "query", "value"]
     def enable_lora(self, rank):
         """Enable Lora on the backbone.
@@ -193,7 +201,8 @@ class Backbone(keras.Model):
         while enabling Lora on the query & value `EinsumDense` layers
         of the attention layers.
         """
-        target_names = ["query_dense", "value_dense", "query", "value"]
+        target_names = self.get_lora_target_names()
         self.trainable = True
         self._lora_enabled_layers = []
         self._lora_rank = rank

keras_hub/src/models/gemma/gemma_attention.py CHANGED Viewed

@@ -4,6 +4,7 @@ from keras import ops
 from keras_hub.src.layers.modeling.rotary_embedding import RotaryEmbedding
 from keras_hub.src.utils.keras_utils import clone_initializer
+from keras_hub.src.utils.keras_utils import has_flash_attention_support
 class CachedGemmaAttention(keras.layers.Layer):
@@ -117,6 +118,36 @@ class CachedGemmaAttention(keras.layers.Layer):
             query_normalization = 1 / np.sqrt(
                 self.hidden_dim // self.num_query_heads
             )
+        use_dot_product_attention = not (
+            self.dropout > 0.0 or (len(q.shape) != 4)
+        )
+        if has_flash_attention_support() and use_dot_product_attention:
+            if self.dropout > 0.0:
+                raise ValueError(
+                    "Flash attention does not support dropout. "
+                    "Please set `dropout` to 0.0."
+                )
+            if attention_mask is not None:
+                while len(attention_mask.shape) < 4:
+                    attention_mask = ops.expand_dims(
+                        attention_mask, axis=1
+                    )  # Add dimension for num_heads
+                if attention_mask.shape[1] != self.num_query_heads:
+                    attention_mask = ops.tile(
+                        attention_mask, [1, self.num_query_heads, 1, 1]
+                    )
+            attention_output = ops.dot_product_attention(
+                query=q,
+                key=k,
+                value=v,
+                bias=None,
+                mask=attention_mask,
+                scale=query_normalization,
+                is_causal=True,
+                flash_attention=True,
+            )
+            return attention_output
         q *= ops.cast(query_normalization, dtype=q.dtype)
         q_shape = ops.shape(q)
@@ -131,8 +162,8 @@ class CachedGemmaAttention(keras.layers.Layer):
         )
         b, q_len, _, _, h = ops.shape(q)
+        # Fallback to standard attention if flash attention is disabled
         attention_logits = ops.einsum("btkgh,bskh->bkgts", q, k)
         if self.logit_soft_cap is not None:
             attention_logits = ops.divide(attention_logits, self.logit_soft_cap)
             attention_logits = ops.multiply(

keras_hub/src/models/pali_gemma/pali_gemma_backbone.py CHANGED Viewed

@@ -274,6 +274,13 @@ class PaliGemmaBackbone(Backbone):
         # Keep the image_sequence_length as a backbone property for easy access.
         self.image_sequence_length = self.vit_encoder.image_sequence_length
+    def get_lora_target_names(self):
+        target_names = super().get_lora_target_names()
+        # Add these for `PaliGemmaVITAttention`.
+        target_names += ["query_proj", "value_proj"]
+        return target_names
     def get_config(self):
         config = super().get_config()
         config.update(

keras_hub/src/models/pali_gemma/pali_gemma_presets.py CHANGED Viewed

@@ -83,6 +83,96 @@ backbone_presets = {
         },
         "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_ft_docci_10b_448/2",
     },
+    "pali_gemma2_mix_3b_224": {
+        "metadata": {
+            "description": (
+                "3 billion parameter, image size 224, 27-layer for "
+                "SigLIP-So400m vision encoder and 26-layer Gemma2 2B lanuage "
+                "model. This model has been fine-tuned on a wide range of "
+                "vision-language tasks and domains."
+            ),
+            "params": 3032094960,
+            "official_name": "PaliGemma2",
+            "path": "pali_gemma2",
+            "model_card": "https://www.kaggle.com/models/google/paligemma-2",
+        },
+        "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_3b_224/2",
+    },
+    "pali_gemma2_mix_3b_448": {
+        "metadata": {
+            "description": (
+                "3 billion parameter, image size 448, 27-layer for "
+                "SigLIP-So400m vision encoder and 26-layer Gemma2 2B lanuage "
+                "model. This model has been fine-tuned on a wide range of "
+                "vision-language tasks and domains."
+            ),
+            "params": 3032979696,
+            "official_name": "PaliGemma2",
+            "path": "pali_gemma2",
+            "model_card": "https://www.kaggle.com/models/google/paligemma-2",
+        },
+        "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_3b_448/2",
+    },
+    "pali_gemma2_mix_10b_224": {
+        "metadata": {
+            "description": (
+                "10 billion parameter, image size 224, 27-layer for "
+                "SigLIP-So400m vision encoder and 42-layer Gemma2 9B lanuage "
+                "model. This model has been fine-tuned on a wide range of "
+                "vision-language tasks and domains."
+            ),
+            "params": 9662409456,
+            "official_name": "PaliGemma2",
+            "path": "pali_gemma2",
+            "model_card": "https://www.kaggle.com/models/google/paligemma-2",
+        },
+        "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_224/2",
+    },
+    "pali_gemma2_mix_10b_448": {
+        "metadata": {
+            "description": (
+                "10 billion parameter, image size 448, 27-layer for "
+                "SigLIP-So400m vision encoder and 42-layer Gemma2 9B lanuage "
+                "model. This model has been fine-tuned on a wide range of "
+                "vision-language tasks and domains."
+            ),
+            "params": 9663294192,
+            "official_name": "PaliGemma2",
+            "path": "pali_gemma2",
+            "model_card": "https://www.kaggle.com/models/google/paligemma-2",
+        },
+        "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_448/2",
+    },
+    "pali_gemma2_mix_28b_224": {
+        "metadata": {
+            "description": (
+                "28 billion parameter, image size 224, 27-layer for "
+                "SigLIP-So400m vision encoder and 46-layer Gemma2 27B lanuage "
+                "model. This model has been fine-tuned on a wide range of "
+                "vision-language tasks and domains."
+            ),
+            "params": 27650192112,
+            "official_name": "PaliGemma2",
+            "path": "pali_gemma2",
+            "model_card": "https://www.kaggle.com/models/google/paligemma-2",
+        },
+        "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_224/2",
+    },
+    "pali_gemma2_mix_28b_448": {
+        "metadata": {
+            "description": (
+                "28 billion parameter, image size 448, 27-layer for "
+                "SigLIP-So400m vision encoder and 46-layer Gemma2 27B lanuage "
+                "model. This model has been fine-tuned on a wide range of "
+                "vision-language tasks and domains."
+            ),
+            "params": 27650192112,
+            "official_name": "PaliGemma2",
+            "path": "pali_gemma2",
+            "model_card": "https://www.kaggle.com/models/google/paligemma-2",
+        },
+        "kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_448/2",
+    },
     "pali_gemma2_pt_3b_224": {
         "metadata": {
             "description": (
@@ -181,7 +271,7 @@ backbone_presets = {
                 "model. This model has been pre-trained on a mixture of "
                 "datasets."
             ),
-            "params": 9662409456,
+            "params": 27650192112,
             "official_name": "PaliGemma2",
             "path": "pali_gemma2",
             "model_card": "https://www.kaggle.com/models/google/paligemma-2",
@@ -196,7 +286,7 @@ backbone_presets = {
                 "model. This model has been pre-trained on a mixture of "
                 "datasets."
             ),
-            "params": 9663294192,
+            "params": 27650192112,
             "official_name": "PaliGemma2",
             "path": "pali_gemma2",
             "model_card": "https://www.kaggle.com/models/google/paligemma-2",
@@ -211,7 +301,7 @@ backbone_presets = {
                 "model. This model has been pre-trained on a mixture of "
                 "datasets."
             ),
-            "params": 9666833136,
+            "params": 27650192112,
             "official_name": "PaliGemma2",
             "path": "pali_gemma2",
             "model_card": "https://www.kaggle.com/models/google/paligemma-2",

keras_hub/src/utils/keras_utils.py CHANGED Viewed

@@ -56,7 +56,19 @@ def standardize_data_format(data_format):
 def has_flash_attention_support():
-    if hasattr(keras.config, "is_flash_attention_enabled"):
+    if (
+        hasattr(keras.config, "is_flash_attention_enabled")
+        and keras.config.backend() == "jax"
+    ):
+        try:
+            from jax.nn import dot_product_attention as dot_product_attention
+        except ImportError:
+            logging.warning(
+                "Flash attention is not supported in your current JAX version. "
+                "Please update it by following the official guide: "
+                "https://jax.readthedocs.io/en/latest/installation.html"
+            )
+            return False
         return True
     else:
         return False

keras_hub/src/version_utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from keras_hub.src.api_export import keras_hub_export
 # Unique source of truth for the version number.
-__version__ = "0.19.0.dev202502190348"
+__version__ = "0.19.0.dev202502210346"
 @keras_hub_export("keras_hub.version")

{keras_hub_nightly-0.19.0.dev202502190348.dist-info → keras_hub_nightly-0.19.0.dev202502210346.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: keras-hub-nightly
-Version: 0.19.0.dev202502190348
+Version: 0.19.0.dev202502210346
 Summary: Industry-strength Natural Language Processing extensions for Keras.
 Home-page: https://github.com/keras-team/keras-hub
 Author: Keras team
@@ -27,10 +27,11 @@ Requires-Dist: packaging
 Requires-Dist: regex
 Requires-Dist: rich
 Requires-Dist: kagglehub
-Requires-Dist: tensorflow-text
 Provides-Extra: extras
 Requires-Dist: rouge-score; extra == "extras"
 Requires-Dist: sentencepiece; extra == "extras"
+Provides-Extra: nlp
+Requires-Dist: tensorflow-text; extra == "nlp"
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier
@@ -147,6 +148,13 @@ To install the latest KerasHub release with Keras 3, simply run:
 pip install --upgrade keras-hub
 ```
+Our text tokenizers are based on TensorFlow Text. Hence, if you are using any
+model which has language as a modality, you will have to run:
+```
+pip install --upgrade keras-hub[nlp]
+```
 To install the latest nightly changes for both KerasHub and Keras, you can use
 our nightly package.

{keras_hub_nightly-0.19.0.dev202502190348.dist-info → keras_hub_nightly-0.19.0.dev202502210346.dist-info}/RECORD RENAMED Viewed

@@ -8,7 +8,7 @@ keras_hub/api/tokenizers/__init__.py,sha256=mtJgQy1spfQnPAkeLoeinsT_W9iCWHlJXwzc
 keras_hub/api/utils/__init__.py,sha256=Gp1E6gG-RtKQS3PBEQEOz9PQvXkXaJ0ySGMqZ7myN7A,215
 keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
-keras_hub/src/version_utils.py,sha256=aAL0M_iBZYGjtaDtwjeSo1Y9KpY-xoKEWzmooZygJ_c,222
+keras_hub/src/version_utils.py,sha256=ttkrKvEmHIzmFoB_r1Q4g722HgNujcQsmyjdwbeHz9E,222
 keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -44,7 +44,7 @@ keras_hub/src/metrics/rouge_base.py,sha256=Pt2DUznhTTeR-fX1nQ_wSbPtmuTgxQTvrGpu8
 keras_hub/src/metrics/rouge_l.py,sha256=JlZhMBV6wS_6zMd57pkTc6yxHkEJT9fVQMlPZKekQzQ,2729
 keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemupRc,3620
 keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-keras_hub/src/models/backbone.py,sha256=lOv8id2qCkewrtBOrSObc3_nh_WOfsHsgGlIBsHug7g,10986
+keras_hub/src/models/backbone.py,sha256=ofIqRvSUrdP6rXAP0QTbStwiEfv-JxS7wTzcHxjj6iQ,11254
 keras_hub/src/models/causal_lm.py,sha256=ReaF-i3SHsCkHh4c28jM72QjMQ8x7yiCwG39FRb-7KE,16786
 keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
 keras_hub/src/models/feature_pyramid_backbone.py,sha256=clEW-TTQSVJ_5qFNdDF0iABkin1p_xlBUFjJrC7T0IA,2247
@@ -183,7 +183,7 @@ keras_hub/src/models/flux/flux_presets.py,sha256=z7C_FbI1_F5YETXuWpc7Yh_0w-5N0eB
 keras_hub/src/models/flux/flux_text_to_image.py,sha256=Rf5dD2EhG0bE8Gyg9sqaA8YEexS1kdraofIkxiZDjvc,4166
 keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=Fs9jr97QtmRUbRRz1kITpkuhDM2GoV3n0XSFC-qQA14,2252
 keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
-keras_hub/src/models/gemma/gemma_attention.py,sha256=1CVN5z9GKoU8TuNMih2_MweDkpd98xSqdic9F8xIBE8,8317
+keras_hub/src/models/gemma/gemma_attention.py,sha256=uvBDwIfv-pEo4IF2LY7vdt2R9W-OQIqOA0hLWVQUluI,9659
 keras_hub/src/models/gemma/gemma_backbone.py,sha256=GzAUSArw_pN9dtWQzTVhWDbW-XyWt4GyMcFLn9hwmh0,13391
 keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=3OXaIXlrKqMIuUnBk-bUz-0SYFL-XkkQTWm8qRY2YII,16770
 keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
@@ -250,12 +250,12 @@ keras_hub/src/models/opt/opt_causal_lm_preprocessor.py,sha256=xHfslVMOZlAIj2V2jI
 keras_hub/src/models/opt/opt_presets.py,sha256=LrjgI5gbq4Cvfl_pmeCnKn4hS_V_0GYTeJaDc9tbeZM,1745
 keras_hub/src/models/opt/opt_tokenizer.py,sha256=oDHeed4xf07tm14hj_C78BkzMuuRwRP2cRHmqYnObrs,2557
 keras_hub/src/models/pali_gemma/__init__.py,sha256=uODWTlttOOchcTLpiYHCEWMXnDxIz8ZVIeYFQN2bd8o,288
-keras_hub/src/models/pali_gemma/pali_gemma_backbone.py,sha256=aRsLlgKqqxwtYxYy-D9k37YSJowUlRWfxpyRBFWDRnI,13413
+keras_hub/src/models/pali_gemma/pali_gemma_backbone.py,sha256=_Sa22j4jk_7400h33S22w0S8Dh8Lzzl6A5WeEp55zSk,13637
 keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=AViEs6YltUqWnIVo7J02JkXcanBgLSdwZwF56TVr8gc,11345
 keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py,sha256=F57y0fZ0wYYxfGIjfrJc1W9uQpViYFx5bvFjj5CqUbI,4814
 keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py,sha256=24ABQ1vGlppV-KfWh0YqJjzM_Lu2GIwvyJ4X2XXie_A,5616
 keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py,sha256=5yM_jUtrFsWIieiwfFBoP7mtPmQAwywkeLKbd7fhmzk,371
-keras_hub/src/models/pali_gemma/pali_gemma_presets.py,sha256=Ka1ChUUSKw-yY2th3QtmNtkeXt0krYfwhkHrScioMls,8979
+keras_hub/src/models/pali_gemma/pali_gemma_presets.py,sha256=zF04iShXky_c3IfUbmLlBN2FYb6iCWH1DWTgDdTCqrI,13006
 keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py,sha256=ljTiADHo0Ok88q-jVzwJIle2C8xcxnudLTsBLzIySaM,2415
 keras_hub/src/models/pali_gemma/pali_gemma_vit.py,sha256=R-W7SCnlLjkgiJ9vrn3ctbBES_yCxJSrCld5dV7nzaY,18235
 keras_hub/src/models/phi3/__init__.py,sha256=zIbf1MU-ks91mEkjTRJAsk51N3BBnXDF2JM1vO-13PQ,245
@@ -386,7 +386,7 @@ keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=hRv_XxoPIPDpHfO0Z
 keras_hub/src/tokenizers/word_piece_tokenizer.py,sha256=vP6AZgbzsRiuPCt3W_n94nsF7XiERnagWcH_rqJHtVU,19943
 keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=cylrs02ZrYQ1TuZr9oyS3NrVbDwGctA3VXbIh1pFJMQ,6743
 keras_hub/src/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-keras_hub/src/utils/keras_utils.py,sha256=ZULqIQylAQen-_pNC96htvLaxSJbfAenNoCo3ZSvY5g,1843
+keras_hub/src/utils/keras_utils.py,sha256=TNgp3ukTiCA0jrGUq2ZV_Xqtzc7CfiFQKyOH5t47z48,2313
 keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
 keras_hub/src/utils/preset_utils.py,sha256=ZbSEUSacKlr_mgHyB3ChUohgOQN7nMCkE6E2lGxt2HA,31927
 keras_hub/src/utils/python_utils.py,sha256=N8nWeO3san4YnGkffRXG3Ix7VEIMTKSN21FX5TuL7G8,202
@@ -413,7 +413,7 @@ keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYum
 keras_hub/src/utils/transformers/convert_vit.py,sha256=9SUZ9utNJhW_5cj3acMn9cRy47u2eIcDsrhmzj77o9k,5187
 keras_hub/src/utils/transformers/preset_loader.py,sha256=DgGJXbTSB9Na8FIR-YWWVqQPOFxHwWrGm41EwcS_EFs,3797
 keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
-keras_hub_nightly-0.19.0.dev202502190348.dist-info/METADATA,sha256=L0fEtVLfSiKpy7fJyO_VUrydFIaVT0Pirw7kPwu3ob8,7498
-keras_hub_nightly-0.19.0.dev202502190348.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-keras_hub_nightly-0.19.0.dev202502190348.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
-keras_hub_nightly-0.19.0.dev202502190348.dist-info/RECORD,,
+keras_hub_nightly-0.19.0.dev202502210346.dist-info/METADATA,sha256=SFwTUAZFRtgw028VYnTTxCexaXIPDHlfm7BdUqZPW4Q,7721
+keras_hub_nightly-0.19.0.dev202502210346.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+keras_hub_nightly-0.19.0.dev202502210346.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
+keras_hub_nightly-0.19.0.dev202502210346.dist-info/RECORD,,

{keras_hub_nightly-0.19.0.dev202502190348.dist-info → keras_hub_nightly-0.19.0.dev202502210346.dist-info}/WHEEL RENAMED Viewed

File without changes

{keras_hub_nightly-0.19.0.dev202502190348.dist-info → keras_hub_nightly-0.19.0.dev202502210346.dist-info}/top_level.txt RENAMED Viewed

File without changes

keras-hub-nightly 0.19.0.dev202502190348__py3-none-any.whl → 0.19.0.dev202502210346__py3-none-any.whl

keras-hub-nightly 0.19.0.dev202502190348py3-none-any.whl → 0.19.0.dev202502210346py3-none-any.whl