PyPI - keras-hub-nightly - Versions diffs - 0.22.0.dev202508170419__py3-none-any.whl → 0.24.0.dev202511090424__py3-none-any.whl - Mend

keras-hub-nightly 0.22.0.dev202508170419py3-none-any.whl → 0.24.0.dev202511090424py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of keras-hub-nightly might be problematic. Click here for more details.

Files changed (126) hide show

keras_hub/layers/__init__.py +15 -0
keras_hub/models/__init__.py +93 -0
keras_hub/src/layers/modeling/position_embedding.py +21 -6
keras_hub/src/layers/modeling/reversible_embedding.py +8 -1
keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
keras_hub/src/models/backbone.py +28 -16
keras_hub/src/models/causal_lm.py +37 -0
keras_hub/src/models/causal_lm_preprocessor.py +14 -0
keras_hub/src/models/clip/clip_presets.py +8 -8
keras_hub/src/models/d_fine/__init__.py +5 -0
keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
keras_hub/src/models/d_fine/d_fine_presets.py +155 -0
keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
keras_hub/src/models/deberta_v3/disentangled_self_attention.py +7 -2
keras_hub/src/models/depth_anything/__init__.py +9 -0
keras_hub/src/models/depth_anything/depth_anything_backbone.py +232 -0
keras_hub/src/models/depth_anything/depth_anything_depth_estimator.py +70 -0
keras_hub/src/models/depth_anything/depth_anything_depth_estimator_preprocessor.py +16 -0
keras_hub/src/models/depth_anything/depth_anything_image_converter.py +10 -0
keras_hub/src/models/depth_anything/depth_anything_layers.py +725 -0
keras_hub/src/models/depth_anything/depth_anything_loss.py +89 -0
keras_hub/src/models/depth_anything/depth_anything_presets.py +41 -0
keras_hub/src/models/depth_anything/interpolate.py +62 -0
keras_hub/src/models/depth_estimator.py +239 -0
keras_hub/src/models/depth_estimator_preprocessor.py +78 -0
keras_hub/src/models/dinov2/dinov2_backbone.py +29 -3
keras_hub/src/models/dinov2/dinov2_layers.py +16 -4
keras_hub/src/models/dinov3/__init__.py +5 -0
keras_hub/src/models/dinov3/dinov3_backbone.py +263 -0
keras_hub/src/models/dinov3/dinov3_image_converter.py +8 -0
keras_hub/src/models/dinov3/dinov3_layers.py +1013 -0
keras_hub/src/models/dinov3/dinov3_presets.py +4 -0
keras_hub/src/models/gemma/gemma_backbone.py +0 -1
keras_hub/src/models/gemma/gemma_presets.py +30 -0
keras_hub/src/models/gemma3/gemma3_attention.py +48 -0
keras_hub/src/models/gemma3/gemma3_backbone.py +4 -1
keras_hub/src/models/gemma3/gemma3_decoder_block.py +12 -0
keras_hub/src/models/gemma3/gemma3_presets.py +39 -0
keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
keras_hub/src/models/image_to_image.py +5 -0
keras_hub/src/models/inpaint.py +5 -0
keras_hub/src/models/mobilenetv5/__init__.py +9 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_attention.py +699 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_backbone.py +396 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_blocks.py +890 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_builder.py +436 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier.py +157 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_preprocessor.py +16 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_image_converter.py +10 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_layers.py +462 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_presets.py +15 -0
keras_hub/src/models/mobilenetv5/mobilenetv5_utils.py +146 -0
keras_hub/src/models/parseq/__init__.py +5 -0
keras_hub/src/models/parseq/parseq_backbone.py +134 -0
keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
keras_hub/src/models/parseq/parseq_decoder.py +418 -0
keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
keras_hub/src/models/parseq/parseq_presets.py +15 -0
keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
keras_hub/src/models/qwen3_moe/__init__.py +5 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py +371 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +365 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm.py +357 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_preprocessor.py +12 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_decoder.py +672 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_layernorm.py +45 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +30 -0
keras_hub/src/models/qwen3_moe/qwen3_moe_tokenizer.py +48 -0
keras_hub/src/models/sam/sam_prompt_encoder.py +3 -1
keras_hub/src/models/siglip/siglip_presets.py +15 -0
keras_hub/src/models/smollm3/smollm3_backbone.py +211 -0
keras_hub/src/models/smollm3/smollm3_causal_lm.py +310 -0
keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py +84 -0
keras_hub/src/models/smollm3/smollm3_layers.py +757 -0
keras_hub/src/models/smollm3/smollm3_tokenizer.py +60 -0
keras_hub/src/models/smollm3/smollm3_utils.py +56 -0
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +3 -3
keras_hub/src/models/t5gemma/__init__.py +5 -0
keras_hub/src/models/t5gemma/t5gemma_attention.py +370 -0
keras_hub/src/models/t5gemma/t5gemma_backbone.py +366 -0
keras_hub/src/models/t5gemma/t5gemma_decoder.py +355 -0
keras_hub/src/models/t5gemma/t5gemma_encoder.py +214 -0
keras_hub/src/models/t5gemma/t5gemma_layers.py +118 -0
keras_hub/src/models/t5gemma/t5gemma_presets.py +374 -0
keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm.py +442 -0
keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_preprocessor.py +216 -0
keras_hub/src/models/t5gemma/t5gemma_tokenizer.py +84 -0
keras_hub/src/models/text_to_image.py +5 -0
keras_hub/src/samplers/beam_sampler.py +6 -6
keras_hub/src/samplers/sampler.py +8 -6
keras_hub/src/tests/test_case.py +40 -3
keras_hub/src/tokenizers/tokenizer.py +15 -0
keras_hub/src/utils/openvino_utils.py +141 -0
keras_hub/src/utils/preset_utils.py +58 -2
keras_hub/src/utils/tensor_utils.py +26 -2
keras_hub/src/utils/timm/convert_mobilenetv5.py +321 -0
keras_hub/src/utils/timm/preset_loader.py +8 -4
keras_hub/src/utils/transformers/convert_dinov2.py +1 -0
keras_hub/src/utils/transformers/convert_dinov3.py +106 -0
keras_hub/src/utils/transformers/convert_qwen3_moe.py +216 -0
keras_hub/src/utils/transformers/convert_smollm3.py +139 -0
keras_hub/src/utils/transformers/convert_t5gemma.py +229 -0
keras_hub/src/utils/transformers/convert_vit.py +4 -1
keras_hub/src/utils/transformers/export/gemma.py +49 -4
keras_hub/src/utils/transformers/export/hf_exporter.py +71 -25
keras_hub/src/utils/transformers/preset_loader.py +12 -0
keras_hub/src/version.py +1 -1
keras_hub/tokenizers/__init__.py +15 -0
{keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/METADATA +1 -1
{keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/RECORD +126 -47
{keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/WHEEL +0 -0
{keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/top_level.txt +0 -0

keras_hub/layers/__init__.py CHANGED Viewed

@@ -75,6 +75,9 @@ from keras_hub.src.models.clip.clip_image_converter import (
 from keras_hub.src.models.cspnet.cspnet_image_converter import (
     CSPNetImageConverter as CSPNetImageConverter,
 )
+from keras_hub.src.models.d_fine.d_fine_image_converter import (
+    DFineImageConverter as DFineImageConverter,
+)
 from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import (
     DeepLabV3ImageConverter as DeepLabV3ImageConverter,
 )
@@ -84,9 +87,15 @@ from keras_hub.src.models.deit.deit_image_converter import (
 from keras_hub.src.models.densenet.densenet_image_converter import (
     DenseNetImageConverter as DenseNetImageConverter,
 )
+from keras_hub.src.models.depth_anything.depth_anything_image_converter import (
+    DepthAnythingImageConverter as DepthAnythingImageConverter,
+)
 from keras_hub.src.models.dinov2.dinov2_image_converter import (
     DINOV2ImageConverter as DINOV2ImageConverter,
 )
+from keras_hub.src.models.dinov3.dinov3_image_converter import (
+    DINOV3ImageConverter as DINOV3ImageConverter,
+)
 from keras_hub.src.models.efficientnet.efficientnet_image_converter import (
     EfficientNetImageConverter as EfficientNetImageConverter,
 )
@@ -102,12 +111,18 @@ from keras_hub.src.models.mit.mit_image_converter import (
 from keras_hub.src.models.mobilenet.mobilenet_image_converter import (
     MobileNetImageConverter as MobileNetImageConverter,
 )
+from keras_hub.src.models.mobilenetv5.mobilenetv5_image_converter import (
+    MobileNetV5ImageConverter as MobileNetV5ImageConverter,
+)
 from keras_hub.src.models.moonshine.moonshine_audio_converter import (
     MoonshineAudioConverter as MoonshineAudioConverter,
 )
 from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import (
     PaliGemmaImageConverter as PaliGemmaImageConverter,
 )
+from keras_hub.src.models.parseq.parseq_image_converter import (
+    PARSeqImageConverter as PARSeqImageConverter,
+)
 from keras_hub.src.models.resnet.resnet_image_converter import (
     ResNetImageConverter as ResNetImageConverter,
 )

keras_hub/models/__init__.py CHANGED Viewed

@@ -108,6 +108,15 @@ from keras_hub.src.models.cspnet.cspnet_image_classifier import (
 from keras_hub.src.models.cspnet.cspnet_image_classifier_preprocessor import (
     CSPNetImageClassifierPreprocessor as CSPNetImageClassifierPreprocessor,
 )
+from keras_hub.src.models.d_fine.d_fine_backbone import (
+    DFineBackbone as DFineBackbone,
+)
+from keras_hub.src.models.d_fine.d_fine_object_detector import (
+    DFineObjectDetector as DFineObjectDetector,
+)
+from keras_hub.src.models.d_fine.d_fine_object_detector_preprocessor import (
+    DFineObjectDetectorPreprocessor as DFineObjectDetectorPreprocessor,
+)
 from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
     DebertaV3Backbone as DebertaV3Backbone,
 )
@@ -157,9 +166,27 @@ from keras_hub.src.models.densenet.densenet_image_classifier import (
 from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import (
     DenseNetImageClassifierPreprocessor as DenseNetImageClassifierPreprocessor,
 )
+from keras_hub.src.models.depth_anything.depth_anything_backbone import (
+    DepthAnythingBackbone as DepthAnythingBackbone,
+)
+from keras_hub.src.models.depth_anything.depth_anything_depth_estimator import (
+    DepthAnythingDepthEstimator as DepthAnythingDepthEstimator,
+)
+from keras_hub.src.models.depth_anything.depth_anything_depth_estimator_preprocessor import (
+    DepthAnythingDepthEstimatorPreprocessor as DepthAnythingDepthEstimatorPreprocessor,
+)
+from keras_hub.src.models.depth_estimator import (
+    DepthEstimator as DepthEstimator,
+)
+from keras_hub.src.models.depth_estimator_preprocessor import (
+    DepthEstimatorPreprocessor as DepthEstimatorPreprocessor,
+)
 from keras_hub.src.models.dinov2.dinov2_backbone import (
     DINOV2Backbone as DINOV2Backbone,
 )
+from keras_hub.src.models.dinov3.dinov3_backbone import (
+    DINOV3Backbone as DINOV3Backbone,
+)
 from keras_hub.src.models.distil_bert.distil_bert_backbone import (
     DistilBertBackbone as DistilBertBackbone,
 )
@@ -404,6 +431,15 @@ from keras_hub.src.models.mobilenet.mobilenet_image_classifier import (
 from keras_hub.src.models.mobilenet.mobilenet_image_classifier_preprocessor import (
     MobileNetImageClassifierPreprocessor as MobileNetImageClassifierPreprocessor,
 )
+from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
+    MobileNetV5Backbone as MobileNetV5Backbone,
+)
+from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier import (
+    MobileNetV5ImageClassifier as MobileNetV5ImageClassifier,
+)
+from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier_preprocessor import (
+    MobileNetV5ImageClassifierPreprocessor as MobileNetV5ImageClassifierPreprocessor,
+)
 from keras_hub.src.models.moonshine.moonshine_audio_to_text import (
     MoonshineAudioToText as MoonshineAudioToText,
 )
@@ -446,6 +482,18 @@ from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm_preprocessor import (
 from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
     PaliGemmaTokenizer as PaliGemmaTokenizer,
 )
+from keras_hub.src.models.parseq.parseq_backbone import (
+    PARSeqBackbone as PARSeqBackbone,
+)
+from keras_hub.src.models.parseq.parseq_causal_lm import (
+    PARSeqCausalLM as PARSeqCausalLM,
+)
+from keras_hub.src.models.parseq.parseq_causal_lm_preprocessor import (
+    PARSeqCausalLMPreprocessor as PARSeqCausalLMPreprocessor,
+)
+from keras_hub.src.models.parseq.parseq_tokenizer import (
+    PARSeqTokenizer as PARSeqTokenizer,
+)
 from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone as Phi3Backbone
 from keras_hub.src.models.phi3.phi3_causal_lm import (
     Phi3CausalLM as Phi3CausalLM,
@@ -491,6 +539,15 @@ from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
 from keras_hub.src.models.qwen3.qwen3_tokenizer import (
     Qwen3Tokenizer as Qwen3Tokenizer,
 )
+from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import (
+    Qwen3MoeBackbone as Qwen3MoeBackbone,
+)
+from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import (
+    Qwen3MoeCausalLM as Qwen3MoeCausalLM,
+)
+from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor import (
+    Qwen3MoeCausalLMPreprocessor as Qwen3MoeCausalLMPreprocessor,
+)
 from keras_hub.src.models.qwen_moe.qwen_moe_backbone import (
     QwenMoeBackbone as QwenMoeBackbone,
 )
@@ -595,6 +652,30 @@ from keras_hub.src.models.siglip.siglip_tokenizer import (
 from keras_hub.src.models.siglip.siglip_vision_encoder import (
     SigLIPVisionEncoder as SigLIPVisionEncoder,
 )
+from keras_hub.src.models.smollm3.smollm3_backbone import (
+    SmolLM3Backbone as SmolLM3Backbone,
+)
+from keras_hub.src.models.smollm3.smollm3_backbone import (
+    SmolLM3Backbone as SmolLMBackbone,
+)
+from keras_hub.src.models.smollm3.smollm3_causal_lm import (
+    SmolLM3CausalLM as SmolLM3CausalLM,
+)
+from keras_hub.src.models.smollm3.smollm3_causal_lm import (
+    SmolLM3CausalLM as SmolLMCausalLM,
+)
+from keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor import (
+    SmolLM3CausalLMPreprocessor as SmolLM3CausalLMPreprocessor,
+)
+from keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor import (
+    SmolLM3CausalLMPreprocessor as SmolLMCausalLMPreprocessor,
+)
+from keras_hub.src.models.smollm3.smollm3_tokenizer import (
+    SmolLM3Tokenizer as SmolLM3Tokenizer,
+)
+from keras_hub.src.models.smollm3.smollm3_tokenizer import (
+    SmolLM3Tokenizer as SmolLMTokenizer,
+)
 from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (
     StableDiffusion3Backbone as StableDiffusion3Backbone,
 )
@@ -615,6 +696,18 @@ from keras_hub.src.models.t5.t5_preprocessor import (
     T5Preprocessor as T5Preprocessor,
 )
 from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer
+from keras_hub.src.models.t5gemma.t5gemma_backbone import (
+    T5GemmaBackbone as T5GemmaBackbone,
+)
+from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import (
+    T5GemmaSeq2SeqLM as T5GemmaSeq2SeqLM,
+)
+from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm_preprocessor import (
+    T5GemmaSeq2SeqLMPreprocessor as T5GemmaSeq2SeqLMPreprocessor,
+)
+from keras_hub.src.models.t5gemma.t5gemma_tokenizer import (
+    T5GemmaTokenizer as T5GemmaTokenizer,
+)
 from keras_hub.src.models.task import Task as Task
 from keras_hub.src.models.text_classifier import TextClassifier as Classifier
 from keras_hub.src.models.text_classifier import (

keras_hub/src/layers/modeling/position_embedding.py CHANGED Viewed

@@ -31,6 +31,11 @@ class PositionEmbedding(keras.layers.Layer):
         start_index: An integer or integer tensor. The starting position to
             compute the position embedding from. This is useful during cached
             decoding, where each position is predicted separately in a loop.
+        positions: Tensor of shape `(sequence_length,)` or
+            `(batch_size, sequence_length)`. Custom positions for the input
+            sequence. If specified, this tensor will be used to
+            compute the position embedding, and the `start_index` argument will
+            be ignored. This is useful for cases with non-standard positions.
     Example:
@@ -91,18 +96,28 @@ class PositionEmbedding(keras.layers.Layer):
         )
         self.built = True
-    def call(self, inputs, start_index=0):
+    def call(self, inputs, start_index=0, positions=None):
         shape = ops.shape(inputs)
         feature_length = shape[-1]
         sequence_length = shape[-2]
         # trim to match the length of the input sequence, which might be less
         # than the sequence_length of the layer.
         position_embeddings = ops.convert_to_tensor(self.position_embeddings)
-        position_embeddings = ops.slice(
-            position_embeddings,
-            (start_index, 0),
-            (sequence_length, feature_length),
-        )
+        if positions is None:
+            position_embeddings = ops.slice(
+                position_embeddings,
+                (start_index, 0),
+                (sequence_length, feature_length),
+            )
+        else:
+            # Take care of unbatched `positions`.
+            if len(ops.shape(positions)) == 1:
+                positions = ops.expand_dims(positions, axis=0)
+            position_embeddings = ops.take(
+                position_embeddings, positions, axis=0
+            )
         return ops.broadcast_to(position_embeddings, shape)
     def compute_output_shape(self, input_shape):

keras_hub/src/layers/modeling/reversible_embedding.py CHANGED Viewed

@@ -235,7 +235,8 @@ class ReversibleEmbedding(keras.layers.Embedding):
         return super()._int8_call(inputs)
-    def quantize(self, mode, type_check=True):
+    def quantize(self, mode, type_check=True, config=None):
+        del config
         if type_check and type(self) is not ReversibleEmbedding:
             raise self._not_implemented_error(self.quantize)
@@ -244,6 +245,12 @@ class ReversibleEmbedding(keras.layers.Embedding):
                 inputs, axis=axis, to_numpy=True
             )
+        if mode != "int8":
+            raise NotImplementedError(
+                "Invalid quantization mode. Expected 'int8'. "
+                f"Received: quantization_mode={mode}"
+            )
         embeddings_shape = (self.input_dim, self.output_dim)
         if mode == "int8":
             embeddings, embeddings_scale = abs_max_quantize(

keras_hub/src/layers/modeling/rotary_embedding.py CHANGED Viewed

@@ -37,6 +37,11 @@ class RotaryEmbedding(keras.layers.Layer):
         start_index: An integer or integer tensor. The starting position to
             compute the rotary embedding from. This is useful during cached
             decoding, where each position is predicted separately in a loop.
+        positions: Tensor of shape `(sequence_length,)` or
+            `(batch_size, sequence_length)`. Custom positions for the input
+            sequence. If specified, this tensor will be used to
+            compute the rotary embedding, and the `start_index` argument will
+            be ignored. This is useful for cases with non-standard positions.
     Examples:
@@ -76,6 +81,11 @@ class RotaryEmbedding(keras.layers.Layer):
         self.built = True
     def call(self, inputs, start_index=0, positions=None):
+        # Take care of unbatched `positions`.
+        if positions is not None:
+            if len(ops.shape(positions)) == 1:
+                positions = ops.expand_dims(positions, axis=0)
         inputs = ops.moveaxis(
             inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
         )
@@ -103,6 +113,7 @@ class RotaryEmbedding(keras.layers.Layer):
         return positions + ops.cast(start_index, dtype="float32")
     def _compute_cos_sin_embedding(self, inputs, start_index=0, positions=None):
+        batch_axis = 0
         feature_axis = len(inputs.shape) - 1
         sequence_axis = 1
@@ -111,21 +122,20 @@ class RotaryEmbedding(keras.layers.Layer):
         if positions is None:
             positions = self._compute_positions(inputs, start_index)
+            positions = ops.expand_dims(positions, axis=batch_axis)
         else:
             positions = ops.cast(positions, "float32")
         positions = positions / ops.cast(self.scaling_factor, "float32")
-        freq = ops.einsum("i,j->ij", positions, inverse_freq)
+        freq = ops.einsum("bi,j->bij", positions, inverse_freq)
         embedding = ops.stack((freq, freq), axis=-2)
         embedding = ops.reshape(
             embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
         )
-        # Reshape the embedding to be broadcastable with input shape.
-        if feature_axis < sequence_axis:
-            embedding = ops.transpose(embedding)
         for axis in range(len(inputs.shape)):
-            if axis != sequence_axis and axis != feature_axis:
+            if axis not in (batch_axis, sequence_axis, feature_axis):
                 embedding = ops.expand_dims(embedding, axis)
         cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)

keras_hub/src/layers/modeling/sine_position_encoding.py CHANGED Viewed

@@ -30,6 +30,11 @@ class SinePositionEncoding(keras.layers.Layer):
         start_index: An integer or integer tensor. The starting position to
             compute the encoding from. This is useful during cached decoding,
             where each position is predicted separately in a loop.
+        positions: Tensor of shape `(sequence_length,)` or
+            `(batch_size, sequence_length)`. Custom positions for the input
+            sequence. If specified, this tensor will be used to
+            compute the position embedding, and the `start_index` argument will
+            be ignored. This is useful for cases with non-standard positions.
     Example:
     ```python
@@ -58,27 +63,35 @@ class SinePositionEncoding(keras.layers.Layer):
         self.max_wavelength = max_wavelength
         self.built = True
-    def call(self, inputs, start_index=0):
+    def call(self, inputs, start_index=0, positions=None):
         shape = ops.shape(inputs)
         seq_length = shape[-2]
         hidden_size = shape[-1]
-        positions = ops.arange(seq_length)
-        positions = ops.cast(positions + start_index, self.compute_dtype)
+        if positions is None:
+            positions = ops.arange(seq_length)
+            positions = ops.cast(positions + start_index, self.compute_dtype)
+        # Take care of unbatched `positions`.
+        if len(ops.shape(positions)) == 1:
+            positions = ops.expand_dims(positions, axis=0)
         min_freq = ops.cast(1 / self.max_wavelength, dtype=self.compute_dtype)
         timescales = ops.power(
             min_freq,
             ops.cast(2 * (ops.arange(hidden_size) // 2), self.compute_dtype)
             / ops.cast(hidden_size, self.compute_dtype),
         )
-        angles = ops.expand_dims(positions, 1) * ops.expand_dims(timescales, 0)
+        angles = ops.einsum("bi,j->bij", positions, timescales)
         # even indices are sine, odd are cosine
         cos_mask = ops.cast(ops.arange(hidden_size) % 2, self.compute_dtype)
         sin_mask = 1 - cos_mask
-        # embedding shape is [seq_length, hidden_size]
-        positional_encodings = (
-            ops.sin(angles) * sin_mask + ops.cos(angles) * cos_mask
-        )
+        # embedding shape is `[bsz (or 1), seq_length, hidden_size]`.
+        positional_encodings = ops.einsum(
+            "bij,j->bij", ops.sin(angles), sin_mask
+        ) + ops.einsum("bij,j->bij", ops.cos(angles), cos_mask)
         return ops.broadcast_to(positional_encodings, shape)
     def get_config(self):

keras_hub/src/layers/modeling/token_and_position_embedding.py CHANGED Viewed

@@ -120,11 +120,12 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
         )
         return config
-    def call(self, inputs, start_index=0):
+    def call(self, inputs, start_index=0, positions=None):
         embedded_tokens = self.token_embedding(inputs)
         embedded_positions = self.position_embedding(
             embedded_tokens,
             start_index=start_index,
+            positions=positions,
         )
         outputs = embedded_tokens + embedded_positions
         return outputs

keras_hub/src/models/backbone.py CHANGED Viewed

@@ -91,21 +91,16 @@ class Backbone(keras.Model):
         }
         # Add quantization support by utilizing `DTypePolicyMap`
-        try:
-            if isinstance(
-                self.dtype_policy, keras.dtype_policies.DTypePolicyMap
-            ):
-                config.update({"dtype": self.dtype_policy})
-            else:
-                policy_map = keras.dtype_policies.DTypePolicyMap()
-                for layer in self._flatten_layers():
-                    if layer.quantization_mode is not None:
-                        policy_map[layer.path] = layer.dtype_policy
-                if len(policy_map) > 0:
-                    config.update({"dtype": policy_map})
-        # Before Keras 3.2, there is no `keras.dtype_policies.get`.
-        except AttributeError:
-            pass
+        dtype = self.dtype_policy
+        if not isinstance(dtype, keras.dtype_policies.DTypePolicyMap):
+            policy_map = keras.dtype_policies.DTypePolicyMap()
+            for layer in self._flatten_layers():
+                if layer.quantization_mode is not None:
+                    policy_map[layer.path] = layer.dtype_policy
+            if len(policy_map) > 0:
+                dtype = policy_map
+        config.update({"dtype": keras.dtype_policies.serialize(dtype)})
         return config
     @classmethod
@@ -135,7 +130,8 @@ class Backbone(keras.Model):
         1. a built-in preset identifier like `'bert_base_en'`
         2. a Kaggle Models handle like `'kaggle://user/bert/keras/bert_base_en'`
         3. a Hugging Face handle like `'hf://user/bert_base_en'`
-        4. a path to a local preset directory like `'./bert_base_en'`
+        4. a ModelScope handle like `'modelscope://user/bert_base_en'`
+        5. a path to a local preset directory like `'./bert_base_en'`
         This constructor can be called in one of two ways. Either from the base
         class like `keras_hub.models.Backbone.from_preset()`, or from
@@ -277,3 +273,19 @@ class Backbone(keras.Model):
             layer.lora_kernel_a.assign(lora_kernel_a)
             layer.lora_kernel_b.assign(lora_kernel_b)
         store.close()
+    def export_to_transformers(self, path):
+        """Export the backbone model to HuggingFace Transformers format.
+        This saves the backbone's configuration and weights in a format
+        compatible with HuggingFace Transformers. For unsupported model
+        architectures, a ValueError is raised.
+        Args:
+            path: str. Path to save the exported model.
+        """
+        from keras_hub.src.utils.transformers.export.hf_exporter import (
+            export_backbone,
+        )
+        export_backbone(self, path)

keras_hub/src/models/causal_lm.py CHANGED Viewed

@@ -132,6 +132,17 @@ class CausalLM(Task):
             return self.generate_function
         self.generate_function = self.generate_step
+        if keras.config.backend() == "openvino":
+            from keras_hub.src.utils.openvino_utils import ov_infer
+            def wrapped_generate_function(inputs, stop_token_ids=None):
+                # Convert to numpy for OpenVINO backend
+                inputs = tree.map_structure(ops.array, inputs)
+                return ov_infer(
+                    self, inputs, stop_token_ids, self.generate_step
+                )
+            self.generate_function = wrapped_generate_function
         if keras.config.backend() == "torch":
             import torch
@@ -392,3 +403,29 @@ class CausalLM(Task):
             outputs = [postprocess(x) for x in outputs]
         return self._normalize_generate_outputs(outputs, input_is_scalar)
+    def export_to_transformers(self, path):
+        """Export the full CausalLM model to HuggingFace Transformers format.
+        This exports the trainable model, tokenizer, and configurations in a
+        format compatible with HuggingFace Transformers. For unsupported model
+        architectures, a ValueError is raised.
+        If the preprocessor is attached (default), both the trainable model and
+        tokenizer are exported. To export only the trainable model, set
+        `self.preprocessor = None` before calling this method, then export the
+        preprocessor separately via `preprocessor.export_to_transformers(path)`.
+        Args:
+            path: str. Path to save the exported model.
+        """
+        from keras_hub.src.utils.transformers.export.hf_exporter import (
+            export_to_safetensors,
+        )
+        export_to_safetensors(self, path)
+    def _post_quantize(self, mode, **kwargs):
+        super()._post_quantize(mode, **kwargs)
+        # Reset the compiled generate function.
+        self.generate_function = None

keras_hub/src/models/causal_lm_preprocessor.py CHANGED Viewed

@@ -180,3 +180,17 @@ class CausalLMPreprocessor(Preprocessor):
         self._sequence_length = value
         if self.packer is not None:
             self.packer.sequence_length = value
+    def export_to_transformers(self, path):
+        """Export the preprocessor to HuggingFace Transformers format.
+        Args:
+            path: str. Path to save the exported preprocessor/tokenizer.
+        """
+        if self.tokenizer is None:
+            raise ValueError("Preprocessor must have a tokenizer for export.")
+        from keras_hub.src.utils.transformers.export.hf_exporter import (
+            export_tokenizer,
+        )
+        export_tokenizer(self.tokenizer, path)

keras_hub/src/models/clip/clip_presets.py CHANGED Viewed

@@ -11,7 +11,7 @@ backbone_presets = {
             "params": 149620934,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch16/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch16/3",
     },
     "clip_vit_base_patch32": {
         "metadata": {
@@ -22,7 +22,7 @@ backbone_presets = {
             "params": 151277363,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch32/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch32/3",
     },
     "clip_vit_large_patch14": {
         "metadata": {
@@ -33,7 +33,7 @@ backbone_presets = {
             "params": 427616770,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14/3",
     },
     "clip_vit_large_patch14_336": {
         "metadata": {
@@ -44,7 +44,7 @@ backbone_presets = {
             "params": 427944770,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14_336/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14_336/3",
     },
     "clip_vit_b_32_laion2b_s34b_b79k": {
         "metadata": {
@@ -55,7 +55,7 @@ backbone_presets = {
             "params": 151277363,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_b_32_laion2b_s34b_b79k/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_b_32_laion2b_s34b_b79k/3",
     },
     "clip_vit_h_14_laion2b_s32b_b79k": {
         "metadata": {
@@ -66,7 +66,7 @@ backbone_presets = {
             "params": 986109698,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_h_14_laion2b_s32b_b79k/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_h_14_laion2b_s32b_b79k/3",
     },
     "clip_vit_g_14_laion2b_s12b_b42k": {
         "metadata": {
@@ -77,7 +77,7 @@ backbone_presets = {
             "params": 1366678530,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_g_14_laion2b_s12b_b42k/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_g_14_laion2b_s12b_b42k/3",
     },
     "clip_vit_bigg_14_laion2b_39b_b160k": {
         "metadata": {
@@ -88,6 +88,6 @@ backbone_presets = {
             "params": 2539567362,
             "path": "clip",
         },
-        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_bigg_14_laion2b_39b_b160k/2",
+        "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_bigg_14_laion2b_39b_b160k/3",
     },
 }

keras_hub/src/models/d_fine/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from keras_hub.src.models.d_fine.d_fine_backbone import DFineBackbone
+from keras_hub.src.models.d_fine.d_fine_presets import backbone_presets
+from keras_hub.src.utils.preset_utils import register_presets
+register_presets(backbone_presets, DFineBackbone)

keras-hub-nightly 0.22.0.dev202508170419__py3-none-any.whl → 0.24.0.dev202511090424__py3-none-any.whl

Potentially problematic release.

keras-hub-nightly 0.22.0.dev202508170419py3-none-any.whl → 0.24.0.dev202511090424py3-none-any.whl