PyPI - keras-hub-nightly - Versions diffs - 0.19.0.dev202503010353__py3-none-any.whl → 0.19.0.dev202503030351__py3-none-any.whl - Mend

keras-hub-nightly 0.19.0.dev202503010353py3-none-any.whl → 0.19.0.dev202503030351py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

keras_hub/api/layers/__init__.py CHANGED Viewed

@@ -70,6 +70,9 @@ from keras_hub.src.models.sam.sam_prompt_encoder import SAMPromptEncoder
 from keras_hub.src.models.segformer.segformer_image_converter import (
     SegFormerImageConverter,
 )
+from keras_hub.src.models.siglip.siglip_image_converter import (
+    SigLIPImageConverter,
+)
 from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter
 from keras_hub.src.models.vit.vit_image_converter import ViTImageConverter
 from keras_hub.src.models.whisper.whisper_audio_converter import (

keras_hub/api/models/__init__.py CHANGED Viewed

@@ -312,6 +312,13 @@ from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor impor
 )
 from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM
 from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor
+from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone
+from keras_hub.src.models.siglip.siglip_preprocessor import SigLIPPreprocessor
+from keras_hub.src.models.siglip.siglip_text_encoder import SigLIPTextEncoder
+from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer
+from keras_hub.src.models.siglip.siglip_vision_encoder import (
+    SigLIPVisionEncoder,
+)
 from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (
     StableDiffusion3Backbone,
 )

keras_hub/api/tokenizers/__init__.py CHANGED Viewed

@@ -30,6 +30,7 @@ from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
 )
 from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer
 from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer
+from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer
 from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer
 from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer
 from keras_hub.src.models.xlm_roberta.xlm_roberta_tokenizer import (

keras_hub/src/layers/preprocessing/image_converter.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import math
 import keras
+import ml_dtypes
 import numpy as np
 from keras import ops
@@ -18,6 +19,95 @@ from keras_hub.src.utils.tensor_utils import check_bounding_box_support
 from keras_hub.src.utils.tensor_utils import preprocessing_function
+# TODO: Use `keras.layers.Resizing` once `antialias` is configurable.
+# https://github.com/keras-team/keras/pull/20972
+def _saturate_cast(x, dtype, backend_module):
+    def get_dtype_min_max(dtype):
+        if "bool" == dtype:
+            dtype_min = 0
+            dtype_max = 1
+        elif "int" in dtype:
+            dtype_min = ml_dtypes.iinfo(dtype).min
+            dtype_max = ml_dtypes.iinfo(dtype).max
+        else:
+            dtype_min = ml_dtypes.finfo(dtype).min
+            dtype_max = ml_dtypes.finfo(dtype).max
+        return dtype_min, dtype_max
+    dtype = keras.backend.standardize_dtype(dtype)
+    in_dtype = keras.backend.standardize_dtype(x.dtype)
+    in_min, in_max = get_dtype_min_max(in_dtype)
+    out_min, out_max = get_dtype_min_max(dtype)
+    min_limit = np.maximum(in_min, out_min).astype(in_dtype)
+    if min_limit < out_min:
+        min_limit = np.nextafter(min_limit, 0, dtype=in_dtype)
+    max_limit = np.minimum(in_max, out_max).astype(in_dtype)
+    if max_limit > out_max:
+        max_limit = np.nextafter(max_limit, 0, dtype=in_dtype)
+    x = backend_module.numpy.clip(x, min_limit, max_limit)
+    return backend_module.cast(x, dtype)
+class ResizingAntialiasConfigurable(keras.layers.Resizing):
+    """A preprocessing layer which resizes images.
+    This class is the same as `keras.layers.Resizing` but exposes `antialias` as
+    a configurable parameter.
+    """
+    def __init__(
+        self,
+        height,
+        width,
+        interpolation="bilinear",
+        antialias=False,
+        crop_to_aspect_ratio=False,
+        pad_to_aspect_ratio=False,
+        fill_mode="constant",
+        fill_value=0.0,
+        data_format=None,
+        **kwargs,
+    ):
+        super().__init__(
+            height=height,
+            width=width,
+            interpolation=interpolation,
+            crop_to_aspect_ratio=crop_to_aspect_ratio,
+            pad_to_aspect_ratio=pad_to_aspect_ratio,
+            fill_mode=fill_mode,
+            fill_value=fill_value,
+            data_format=data_format,
+            **kwargs,
+        )
+        self.antialias = bool(antialias)
+    def transform_images(self, images, transformation=None, training=True):
+        size = (self.height, self.width)
+        resized = self.backend.image.resize(
+            images,
+            size=size,
+            interpolation=self.interpolation,
+            antialias=self.antialias,  # Added.
+            data_format=self.data_format,
+            crop_to_aspect_ratio=self.crop_to_aspect_ratio,
+            pad_to_aspect_ratio=self.pad_to_aspect_ratio,
+            fill_mode=self.fill_mode,
+            fill_value=self.fill_value,
+        )
+        if resized.dtype == images.dtype:
+            return resized
+        if keras.backend.is_int_dtype(images.dtype):
+            resized = self.backend.numpy.round(resized)
+        return _saturate_cast(resized, images.dtype, self.backend)
+    def get_config(self):
+        config = super().get_config()
+        config.update({"antialias": self.antialias})
+        return config
 @keras_hub_export("keras_hub.layers.ImageConverter")
 class ImageConverter(PreprocessingLayer):
     """Preprocess raw images into model ready inputs.
@@ -65,6 +155,8 @@ class ImageConverter(PreprocessingLayer):
         interpolation: String, the interpolation method.
             Supports `"bilinear"`, `"nearest"`, `"bicubic"`,
             `"lanczos3"`, `"lanczos5"`. Defaults to `"bilinear"`.
+        antialias: Whether to use an antialiasing filter when downsampling an
+            image. Defaults to `False`.
         bounding_box_format: A string specifying the format of the bounding
             boxes, one of `"xyxy"`, `"rel_xyxy"`, `"xywh"`, `"center_xywh"`,
             `"yxyx"`, `"rel_yxyx"`. Specifies the format of the bounding boxes
@@ -107,6 +199,7 @@ class ImageConverter(PreprocessingLayer):
         crop_to_aspect_ratio=True,
         pad_to_aspect_ratio=False,
         interpolation="bilinear",
+        antialias=False,
         bounding_box_format="yxyx",
         data_format=None,
         **kwargs,
@@ -132,12 +225,13 @@ class ImageConverter(PreprocessingLayer):
         resizing_kwargs = {}
         if check_bounding_box_support():
             resizing_kwargs["bounding_box_format"] = bounding_box_format
-        self.resizing = keras.layers.Resizing(
+        self.resizing = ResizingAntialiasConfigurable(
             height=image_size[0] if image_size else None,
             width=image_size[1] if image_size else None,
             crop_to_aspect_ratio=crop_to_aspect_ratio,
             pad_to_aspect_ratio=pad_to_aspect_ratio,
             interpolation=interpolation,
+            antialias=antialias,
             data_format=data_format,
             dtype=self.dtype_policy,
             name="resizing",
@@ -148,6 +242,7 @@ class ImageConverter(PreprocessingLayer):
         self.crop_to_aspect_ratio = crop_to_aspect_ratio
         self.pad_to_aspect_ratio = pad_to_aspect_ratio
         self.interpolation = interpolation
+        self.antialias = antialias
         self.bounding_box_format = bounding_box_format
         self.data_format = standardize_data_format(data_format)
@@ -211,6 +306,7 @@ class ImageConverter(PreprocessingLayer):
                 "scale": self.scale,
                 "offset": self.offset,
                 "interpolation": self.interpolation,
+                "antialias": self.antialias,
                 "crop_to_aspect_ratio": self.crop_to_aspect_ratio,
                 "pad_to_aspect_ratio": self.pad_to_aspect_ratio,
                 "bounding_box_format": self.bounding_box_format,

keras_hub/src/models/siglip/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone
+from keras_hub.src.models.siglip.siglip_presets import backbone_presets
+from keras_hub.src.utils.preset_utils import register_presets
+register_presets(backbone_presets, SigLIPBackbone)

keras_hub/src/models/siglip/siglip_backbone.py ADDED Viewed

@@ -0,0 +1,230 @@
+import keras
+from keras import layers
+from keras import ops
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.backbone import Backbone
+from keras_hub.src.models.siglip.siglip_layers import SigLIPHead
+from keras_hub.src.models.siglip.siglip_loss import SigLIPLoss
+@keras_hub_export("keras_hub.models.SigLIPBackbone")
+class SigLIPBackbone(Backbone):
+    """SigCLIP core network with hyperparameters.
+    This backbone implements the base architecture for the Sigmoid loss in the
+    Language-Image Pre-training (SigLIP) model. Unlike standard contrastive
+    learning with softmax normalization, the sigmoid loss operates solely on
+    image-text pairs and does not require a global view of the pairwise
+    similarities for normalization. It includes vision and text encoders. This
+    backbone outputs the final logit scores corresponding to each image and
+    token input.
+    The default constructor gives a fully customizable, randomly initialized
+    SigLIP model with any number of layers, heads, and embedding dimensions. To
+    load preset architectures and weights, use the `from_preset` constructor.
+    Args:
+        vision_encoder: The SigLIP vision encoder for encoding the input images.
+        text_encoder: The SigLIP text encoder for encoding the input tokens.
+        projection_dim: int. The size of the projection layer.
+        dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
+            for the models computations and weights. Note that some
+            computations, such as softmax and layer normalization will always
+            be done a float32 precision regardless of dtype.
+    Example:
+    ```python
+    input_data = {
+        "images": np.ones(shape=(1, 224, 224, 3), dtype="float32"),
+        "token_ids": np.ones(shape=(1, 64), dtype="int32"),
+    }
+    # Pretrained SigLIP model.
+    model = keras_hub.models.SigLIPBackbone.from_preset(
+        "siglip_base_patch16_224"
+    )
+    model(input_data)
+    # Randomly initialized SigLIP model with custom config.
+    vision_encoder = keras_hub.models.SigLIPVisionEncoder(
+        patch_size=32,
+        hidden_dim=768,
+        num_layers=8,
+        num_heads=8,
+        intermediate_dim=2048,
+        image_shape=(384, 384, 3),
+    )
+    text_encoder = keras_hub.models.SigLIPTextEncoder(
+        vocabulary_size=32000,
+        embedding_dim=768,
+        hidden_dim=768,
+        num_layers=8,
+        num_heads=8,
+        intermediate_dim=2048,
+    )
+    model = keras_hub.models.SigLIPBackbone(
+        vision_encoder=vision_encoder,
+        text_encoder=text_encoder,
+    )
+    model(input_data)
+    ```
+    """
+    def __init__(
+        self,
+        vision_encoder,
+        text_encoder,
+        dtype=None,
+        **kwargs,
+    ):
+        # === Layers ===
+        self.vision_encoder = vision_encoder
+        self.text_encoder = text_encoder
+        self.siglip_head = SigLIPHead(dtype=dtype, name="siglip_head")
+        # === Functional Model ===
+        image_input = layers.Input(
+            shape=self.vision_encoder.image_shape, name="images"
+        )
+        token_id_input = layers.Input(
+            shape=(None,), dtype="int32", name="token_ids"
+        )
+        vision_embeddings = self.get_vision_embeddings(image_input)
+        text_embeddings = self.get_text_embeddings(token_id_input)
+        vision_logits, text_logits = self.siglip_head(
+            vision_embeddings, text_embeddings
+        )
+        super().__init__(
+            inputs={
+                "images": image_input,
+                "token_ids": token_id_input,
+            },
+            outputs={
+                "vision_logits": vision_logits,
+                "text_logits": text_logits,
+            },
+            dtype=dtype,
+            **kwargs,
+        )
+    def compute_loss(
+        self, x, y=None, y_pred=None, sample_weight=None, **kwargs
+    ):
+        outputs = self(x)
+        text_logits = outputs["text_logits"]
+        batch_size = ops.shape(text_logits)[0]
+        eye = ops.eye(batch_size, dtype=text_logits.dtype)
+        m1_diag1 = -ops.ones_like(text_logits) + 2 * eye
+        return super().compute_loss(
+            x=x,
+            y=m1_diag1,
+            y_pred=text_logits,
+            sample_weight=sample_weight,
+            **kwargs,
+        )
+    def compile(
+        self,
+        optimizer="auto",
+        loss="auto",
+        metrics=None,
+        **kwargs,
+    ):
+        """Configures the `SigLIPBackbone` task for training.
+        `SigLIPBackbone` extends the default compilation signature
+        of `keras.Model.compile` with defaults for `optimizer` and `loss`. To
+        override these defaults, pass any value to these arguments during
+        compilation.
+        Args:
+            optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer`
+                instance. Defaults to `"auto"`, which uses the default
+                optimizer for `SigLIPBackbone`. See `keras.Model.compile` and
+                `keras.optimizers` for more info on possible `optimizer`
+                values.
+            loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance.
+                Defaults to `"auto"`, in which case the default loss
+                computation of `SigLIPBackbone` will be applied.
+                See `keras.Model.compile` and `keras.losses` for more info on
+                possible `loss` values.
+            metrics: `a list of metrics to be evaluated by
+                the model during training and testing. Defaults to `None`.
+                See `keras.Model.compile` and `keras.metrics` for
+                more info on possible `metrics` values.
+            **kwargs: See `keras.Model.compile` for a full list of arguments
+                supported by the compile method.
+        """
+        if optimizer == "auto":
+            # Using the alternative optimizer AdamW instead of the
+            # ScalingViT-Adafactor optimizer mentioned in the paper:
+            # https://arxiv.org/abs/2303.15343 - C. Robustness of SigLIP
+            # results.
+            optimizer = keras.optimizers.AdamW(1e-3, weight_decay=1e-4)
+        if loss == "auto":
+            loss = SigLIPLoss()
+        if metrics == "auto":
+            metrics = [keras.metrics.Accuracy()]
+        super().compile(
+            optimizer=optimizer,
+            loss=loss,
+            metrics=metrics,
+            **kwargs,
+        )
+    def get_vision_embeddings(self, images):
+        """Get the embeddings from the vision encoder.
+        Args:
+            images: The input tensor for the vision encoder.
+        Returns:
+            The output embeddings obtained by applying projection layer to the
+            pooled output of the vision encoder.
+        """
+        return self.vision_encoder({"images": images})
+    def get_text_embeddings(self, token_ids):
+        """Get the embeddings from the text encoder.
+        Args:
+            token_ids: The input int tensor for the text encoder.
+        Returns:
+            The output embeddings obtained by applying projection layer to the
+            pooled output of the text encoder.
+        """
+        return self.text_encoder({"token_ids": token_ids})
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "vision_encoder": layers.serialize(self.vision_encoder),
+                "text_encoder": layers.serialize(self.text_encoder),
+            }
+        )
+        return config
+    @classmethod
+    def from_config(cls, config, custom_objects=None):
+        config = config.copy()
+        # Propagate `dtype` to submodels if needed.
+        if "dtype" in config and config["dtype"] is not None:
+            dtype_config = config["dtype"]
+            if "dtype" not in config["vision_encoder"]["config"]:
+                config["vision_encoder"]["config"]["dtype"] = dtype_config
+            if "dtype" not in config["text_encoder"]["config"]:
+                config["text_encoder"]["config"]["dtype"] = dtype_config
+        # We expect submodels to be instantiated.
+        config["vision_encoder"] = layers.deserialize(
+            config["vision_encoder"], custom_objects=custom_objects
+        )
+        config["text_encoder"] = layers.deserialize(
+            config["text_encoder"], custom_objects=custom_objects
+        )
+        return cls(**config)

keras_hub/src/models/siglip/siglip_image_converter.py ADDED Viewed

@@ -0,0 +1,8 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
+from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone
+@keras_hub_export("keras_hub.layers.SigLIPImageConverter")
+class SigLIPImageConverter(ImageConverter):
+    backbone_cls = SigLIPBackbone

keras-hub-nightly 0.19.0.dev202503010353__py3-none-any.whl → 0.19.0.dev202503030351__py3-none-any.whl

keras-hub-nightly 0.19.0.dev202503010353py3-none-any.whl → 0.19.0.dev202503030351py3-none-any.whl