PyPI - keras-hub - Versions diffs - 0.25.0__py3-none-any.whl → 0.25.1__py3-none-any.whl - Mend

keras-hub 0.25.0py3-none-any.whl → 0.25.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

keras_hub/src/models/gemma3/gemma3_decoder_block.py CHANGED Viewed

@@ -251,6 +251,11 @@ class Gemma3DecoderBlock(keras.layers.Layer):
         cache_update_mask=None,
     ):
         # Note: `vision_mask` is used only for Gemma3.
+        # If float16, we clamp the input to avoid overflow.
+        is_float16 = keras.backend.standardize_dtype(x.dtype) == "float16"
+        if is_float16:
+            x = ops.clip(x, -65504, 65504)
         normalized_x = self.pre_attention_norm(x)
         attention_mask = self._compute_attention_mask(
             normalized_x, padding_mask, vision_mask, cache, cache_update_index
@@ -275,7 +280,15 @@ class Gemma3DecoderBlock(keras.layers.Layer):
         if self.dropout:
             attention = self.attention_dropout(attention)
-        attention_x = x + attention
+        if is_float16:
+            attention_x = ops.add(
+                ops.cast(x, "float32"), ops.cast(attention, "float32")
+            )
+            attention_x = ops.clip(attention_x, -65504, 65504)
+            attention_x = ops.cast(attention_x, "float16")
+        else:
+            attention_x = x + attention
         normalized_x = self.pre_ffw_norm(attention_x)
         x1 = self.gating_ffw(normalized_x)
@@ -286,7 +299,14 @@ class Gemma3DecoderBlock(keras.layers.Layer):
         if self.use_post_ffw_norm:
             x = self.post_ffw_norm(x)
-        x = x + attention_x
+        if is_float16:
+            x = ops.add(
+                ops.cast(x, "float32"), ops.cast(attention_x, "float32")
+            )
+            x = ops.clip(x, -65504, 65504)
+            x = ops.cast(x, "float16")
+        else:
+            x = x + attention_x
         if cache is not None:
             return x, new_cache

keras_hub/src/version.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from keras_hub.src.api_export import keras_hub_export
 # Unique source of truth for the version number.
-__version__ = "0.25.0"
+__version__ = "0.25.1"
 @keras_hub_export("keras_hub.version")

{keras_hub-0.25.0.dist-info → keras_hub-0.25.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: keras-hub
-Version: 0.25.0
+Version: 0.25.1
 Summary: Pretrained models for Keras.
 Author-email: Keras team <keras-users@googlegroups.com>
 License-Expression: Apache-2.0

{keras_hub-0.25.0.dist-info → keras_hub-0.25.1.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=-RPLKDEOnRJmHyB867IApKj98hBrhUIuGtO15xYKQxw,
 keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
 keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
-keras_hub/src/version.py,sha256=rshx76XoLXH7RCuRIo37RPFdGafttvDLFxgeDxJX22E,206
+keras_hub/src/version.py,sha256=WSC-QBbLh3MiIyJz2ADMs5o4B5gmc1jUEKu6olCr_hI,206
 keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -249,7 +249,7 @@ keras_hub/src/models/gemma3/gemma3_attention.py,sha256=u3RNI8dva5lzzqFNTAe9996s8
 keras_hub/src/models/gemma3/gemma3_backbone.py,sha256=HdWDRuF9MMwIzNVZEd1j53ILzptskvCxFiO__nfVQYU,16686
 keras_hub/src/models/gemma3/gemma3_causal_lm.py,sha256=U3C9TWlIz8VefAxQ0wJ6bDz18wqHBie8B26Ub_nFZs4,13843
 keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=_gvKPoXqNXpXcsfc8L29wW50MToHIr2D-4Q6MNVfBU0,29790
-keras_hub/src/models/gemma3/gemma3_decoder_block.py,sha256=CYwYazqwakLNfhOLBl_8Q2TVZcMcOxMtiZtuVlk_hoo,11470
+keras_hub/src/models/gemma3/gemma3_decoder_block.py,sha256=IBfi724Vwtq1vjuoShEEy-WpL8zyiUqeHwg1IVCSehU,12191
 keras_hub/src/models/gemma3/gemma3_image_converter.py,sha256=czi5JrTyKiK0nFzvonviBIX8jjvLHqvGNA9RyheB31k,536
 keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py,sha256=CfYdudk5En9iU6vEnrcrEWIztloD1r8VzF2extqAhAM,4616
 keras_hub/src/models/gemma3/gemma3_presets.py,sha256=3jK1OyDKDdSG_lC7yh-8O5BMKJ61knbVrHQgKe0cJiQ,8209
@@ -638,7 +638,7 @@ keras_hub/src/utils/transformers/export/gemma.py,sha256=xX_vfQwvFZ_-lQX4kgMNOGKL
 keras_hub/src/utils/transformers/export/hf_exporter.py,sha256=Qk52c6LIA2eMHUNY9Vy4STJSpnhLMdJ_t-3ljqhSr4k,5081
 keras_hub/tokenizers/__init__.py,sha256=7squHiwAu3KU5rBiupi4pH0zpUg5BwRfAOu0JcJmfA4,4873
 keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
-keras_hub-0.25.0.dist-info/METADATA,sha256=XcP71H-itOfJNo_jV_WxUacUFlRLho27ZzLzltHwuns,7371
-keras_hub-0.25.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-keras_hub-0.25.0.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
-keras_hub-0.25.0.dist-info/RECORD,,
+keras_hub-0.25.1.dist-info/METADATA,sha256=uzKpDD4OSxVV5X4qPqBWXg11o6G11LhSs_z0F7t7woU,7371
+keras_hub-0.25.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+keras_hub-0.25.1.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
+keras_hub-0.25.1.dist-info/RECORD,,

{keras_hub-0.25.0.dist-info → keras_hub-0.25.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{keras_hub-0.25.0.dist-info → keras_hub-0.25.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

keras-hub 0.25.0__py3-none-any.whl → 0.25.1__py3-none-any.whl

keras-hub 0.25.0py3-none-any.whl → 0.25.1py3-none-any.whl