PyPI - keras-hub-nightly - Versions diffs - 0.19.0.dev202501080345__py3-none-any.whl → 0.19.0.dev202501150344__py3-none-any.whl - Mend

keras-hub-nightly 0.19.0.dev202501080345py3-none-any.whl → 0.19.0.dev202501150344py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

keras_hub/src/metrics/bleu.py CHANGED Viewed

@@ -329,8 +329,9 @@ class Bleu(keras.metrics.Metric):
                 return tf.squeeze(inputs, axis=-1)
             else:
                 raise ValueError(
-                    f"{tensor_name} must be of rank {base_rank}, {base_rank+1} "
-                    f"or {base_rank+2}. Found rank: {inputs.shape.rank}"
+                    f"{tensor_name} must be of rank {base_rank}, "
+                    f"{base_rank + 1}, or {base_rank + 2}. "
+                    f"Found rank: {inputs.shape.rank}"
                 )
         y_true = validate_and_fix_rank(y_true, "y_true", 1)

keras_hub/src/models/basnet/basnet_backbone.py CHANGED Viewed

@@ -219,7 +219,7 @@ def get_resnet_block(_resnet, block_num):
     else:
         x = _resnet.pyramid_outputs[extractor_levels[block_num - 1]]
     y = _resnet.get_layer(
-        f"stack{block_num}_block{num_blocks[block_num]-1}_add"
+        f"stack{block_num}_block{num_blocks[block_num] - 1}_add"
     ).output
     return keras.models.Model(
         inputs=x,

keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py CHANGED Viewed

@@ -88,13 +88,13 @@ class SpatialPyramidPooling(keras.layers.Layer):
                         dilation_rate=dilation_rate,
                         use_bias=False,
                         data_format=self.data_format,
-                        name=f"aspp_conv_{i+2}",
+                        name=f"aspp_conv_{i + 2}",
                     ),
                     keras.layers.BatchNormalization(
-                        axis=self.channel_axis, name=f"aspp_bn_{i+2}"
+                        axis=self.channel_axis, name=f"aspp_bn_{i + 2}"
                     ),
                     keras.layers.Activation(
-                        self.activation, name=f"aspp_activation_{i+2}"
+                        self.activation, name=f"aspp_activation_{i + 2}"
                     ),
                 ]
             )

keras_hub/src/models/densenet/densenet_backbone.py CHANGED Viewed

@@ -81,14 +81,14 @@ class DenseNetBackbone(FeaturePyramidBackbone):
                 channel_axis,
                 stackwise_num_repeats[stack_index],
                 growth_rate,
-                name=f"stack{stack_index+1}",
+                name=f"stack{stack_index + 1}",
             )
             pyramid_outputs[f"P{index}"] = x
             x = apply_transition_block(
                 x,
                 channel_axis,
                 compression_ratio,
-                name=f"transition{stack_index+1}",
+                name=f"transition{stack_index + 1}",
             )
         x = apply_dense_block(
@@ -140,7 +140,7 @@ def apply_dense_block(x, channel_axis, num_repeats, growth_rate, name=None):
     for i in range(num_repeats):
         x = apply_conv_block(
-            x, channel_axis, growth_rate, name=f"{name}_block{i+1}"
+            x, channel_axis, growth_rate, name=f"{name}_block{i + 1}"
         )
     return x

keras_hub/src/models/flux/flux_text_to_image.py CHANGED Viewed

@@ -81,7 +81,7 @@ class FluxTextToImage(TextToImage):
     def fit(self, *args, **kwargs):
         raise NotImplementedError(
-            "Currently, `fit` is not supported for " "`FluxTextToImage`."
+            "Currently, `fit` is not supported for `FluxTextToImage`."
         )
     def generate_step(

keras_hub/src/models/pali_gemma/pali_gemma_presets.py CHANGED Viewed

@@ -5,7 +5,7 @@ backbone_presets = {
     "pali_gemma_3b_mix_224": {
         "metadata": {
             "description": (
-                "image size 224, mix fine tuned, text sequence " "length is 256"
+                "image size 224, mix fine tuned, text sequence length is 256"
             ),
             "params": 2923335408,
             "path": "pali_gemma",
@@ -45,7 +45,7 @@ backbone_presets = {
     "pali_gemma_3b_896": {
         "metadata": {
             "description": (
-                "image size 896, pre trained, text sequence length " "is 512"
+                "image size 896, pre trained, text sequence length is 512"
             ),
             "params": 2927759088,
             "path": "pali_gemma",

keras_hub/src/models/resnet/resnet_backbone.py CHANGED Viewed

@@ -177,7 +177,7 @@ class ResNetBackbone(FeaturePyramidBackbone):
                 use_bias=False,
                 padding="same",
                 dtype=dtype,
-                name=f"conv{conv_index+1}_conv",
+                name=f"conv{conv_index + 1}_conv",
             )(x)
         if not use_pre_activation:

keras_hub/src/models/retinanet/feature_pyramid.py CHANGED Viewed

@@ -209,9 +209,9 @@ class FeaturePyramid(keras.layers.Layer):
             )
             if i == backbone_max_level + 1 and self.use_p5:
                 self.output_conv_layers[level].build(
-                    (None, None, None, input_shapes[f"P{i-1}"][-1])
+                    (None, None, None, input_shapes[f"P{i - 1}"][-1])
                     if self.data_format == "channels_last"
-                    else (None, input_shapes[f"P{i-1}"][1], None, None)
+                    else (None, input_shapes[f"P{i - 1}"][1], None, None)
                 )
             else:
                 self.output_conv_layers[level].build(
@@ -277,7 +277,7 @@ class FeaturePyramid(keras.layers.Layer):
             if i < backbone_max_level:
                 # for the top most output, it doesn't need to merge with any
                 # upper stream outputs
-                upstream_output = self.top_down_op(output_features[f"P{i+1}"])
+                upstream_output = self.top_down_op(output_features[f"P{i + 1}"])
                 output = self.merge_op([output, upstream_output])
             output_features[level] = (
                 self.lateral_batch_norm_layers[level](output)
@@ -296,9 +296,9 @@ class FeaturePyramid(keras.layers.Layer):
         for i in range(backbone_max_level + 1, self.max_level + 1):
             level = f"P{i}"
             feats_in = (
-                inputs[f"P{i-1}"]
+                inputs[f"P{i - 1}"]
                 if i == backbone_max_level + 1 and self.use_p5
-                else output_features[f"P{i-1}"]
+                else output_features[f"P{i - 1}"]
             )
             if i > backbone_max_level + 1:
                 feats_in = self.activation(feats_in)

keras_hub/src/models/stable_diffusion_3/mmdit.py CHANGED Viewed

@@ -15,9 +15,8 @@ class AdaptiveLayerNormalization(layers.Layer):
     Args:
         embedding_dim: int. The size of each embedding vector.
-        residual_modulation: bool. Whether to output the modulation parameters
-            of the residual connection within the block of the diffusion
-            transformers. Defaults to `False`.
+        num_modulations: int. The number of the modulation parameters. The
+            available values are `2`, `6` and `9`. Defaults to `2`.
         **kwargs: other keyword arguments passed to `keras.layers.Layer`,
             including `name`, `dtype` etc.
@@ -28,11 +27,17 @@ class AdaptiveLayerNormalization(layers.Layer):
     https://arxiv.org/abs/2212.09748).
     """
-    def __init__(self, hidden_dim, residual_modulation=False, **kwargs):
+    def __init__(self, hidden_dim, num_modulations=2, **kwargs):
         super().__init__(**kwargs)
-        self.hidden_dim = int(hidden_dim)
-        self.residual_modulation = bool(residual_modulation)
-        num_modulations = 6 if self.residual_modulation else 2
+        hidden_dim = int(hidden_dim)
+        num_modulations = int(num_modulations)
+        if num_modulations not in (2, 6, 9):
+            raise ValueError(
+                "`num_modulations` must be `2`, `6` or `9`. "
+                f"Received: num_modulations={num_modulations}"
+            )
+        self.hidden_dim = hidden_dim
+        self.num_modulations = num_modulations
         self.silu = layers.Activation("silu", dtype=self.dtype_policy)
         self.dense = layers.Dense(
@@ -52,40 +57,84 @@ class AdaptiveLayerNormalization(layers.Layer):
         self.norm.build(inputs_shape)
     def call(self, inputs, embeddings, training=None):
-        x = inputs
+        hidden_states = inputs
         emb = self.dense(self.silu(embeddings), training=training)
-        if self.residual_modulation:
-            shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (
-                ops.split(emb, 6, axis=1)
-            )
+        if self.num_modulations == 9:
+            (
+                shift_msa,
+                scale_msa,
+                gate_msa,
+                shift_mlp,
+                scale_mlp,
+                gate_mlp,
+                shift_msa2,
+                scale_msa2,
+                gate_msa2,
+            ) = ops.split(emb, self.num_modulations, axis=1)
+        elif self.num_modulations == 6:
+            (
+                shift_msa,
+                scale_msa,
+                gate_msa,
+                shift_mlp,
+                scale_mlp,
+                gate_mlp,
+            ) = ops.split(emb, self.num_modulations, axis=1)
         else:
-            shift_msa, scale_msa = ops.split(emb, 2, axis=1)
+            shift_msa, scale_msa = ops.split(emb, self.num_modulations, axis=1)
         scale_msa = ops.expand_dims(scale_msa, axis=1)
         shift_msa = ops.expand_dims(shift_msa, axis=1)
-        x = ops.add(
-            ops.multiply(
-                self.norm(x, training=training),
-                ops.add(1.0, scale_msa),
-            ),
-            shift_msa,
+        norm_hidden_states = ops.cast(
+            self.norm(hidden_states, training=training), scale_msa.dtype
+        )
+        hidden_states = ops.add(
+            ops.multiply(norm_hidden_states, ops.add(1.0, scale_msa)), shift_msa
         )
-        if self.residual_modulation:
-            return x, gate_msa, shift_mlp, scale_mlp, gate_mlp
+        if self.num_modulations == 9:
+            scale_msa2 = ops.expand_dims(scale_msa2, axis=1)
+            shift_msa2 = ops.expand_dims(shift_msa2, axis=1)
+            hidden_states2 = ops.add(
+                ops.multiply(norm_hidden_states, ops.add(1.0, scale_msa2)),
+                shift_msa2,
+            )
+            return (
+                hidden_states,
+                gate_msa,
+                shift_mlp,
+                scale_mlp,
+                gate_mlp,
+                hidden_states2,
+                gate_msa2,
+            )
+        elif self.num_modulations == 6:
+            return hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp
         else:
-            return x
+            return hidden_states
     def get_config(self):
         config = super().get_config()
         config.update(
             {
                 "hidden_dim": self.hidden_dim,
-                "residual_modulation": self.residual_modulation,
+                "num_modulations": self.num_modulations,
             }
         )
         return config
     def compute_output_shape(self, inputs_shape, embeddings_shape):
-        if self.residual_modulation:
+        if self.num_modulations == 9:
+            return (
+                inputs_shape,
+                embeddings_shape,
+                embeddings_shape,
+                embeddings_shape,
+                embeddings_shape,
+                inputs_shape,
+                embeddings_shape,
+            )
+        elif self.num_modulations == 6:
             return (
                 inputs_shape,
                 embeddings_shape,
@@ -345,6 +394,27 @@ class TimestepEmbedding(layers.Layer):
         return output_shape
+def get_qk_norm(qk_norm=None, q_norm_name="q_norm", k_norm_name="k_norm"):
+    """Helper function to instantiate `LayerNormalization` layers."""
+    q_norm = None
+    k_norm = None
+    if qk_norm is None:
+        pass
+    elif qk_norm == "rms_norm":
+        q_norm = layers.LayerNormalization(
+            epsilon=1e-6, rms_scaling=True, dtype="float32", name=q_norm_name
+        )
+        k_norm = layers.LayerNormalization(
+            epsilon=1e-6, rms_scaling=True, dtype="float32", name=k_norm_name
+        )
+    else:
+        raise NotImplementedError(
+            "Supported `qk_norm` are `'rms_norm'` and `None`. "
+            f"Received: qk_norm={qk_norm}."
+        )
+    return q_norm, k_norm
 class DismantledBlock(layers.Layer):
     """A dismantled block used to compute pre- and post-attention.
@@ -356,6 +426,8 @@ class DismantledBlock(layers.Layer):
             the end of the block.
         qk_norm: Optional str. Whether to normalize the query and key tensors.
             Available options are `None` and `"rms_norm"`. Defaults to `None`.
+        use_dual_attention: bool. Whether to use a dual attention in the
+            block. Defaults to `False`.
         **kwargs: other keyword arguments passed to `keras.layers.Layer`,
             including `name`, `dtype` etc.
     """
@@ -367,6 +439,7 @@ class DismantledBlock(layers.Layer):
         mlp_ratio=4.0,
         use_projection=True,
         qk_norm=None,
+        use_dual_attention=False,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -375,6 +448,7 @@ class DismantledBlock(layers.Layer):
         self.mlp_ratio = mlp_ratio
         self.use_projection = use_projection
         self.qk_norm = qk_norm
+        self.use_dual_attention = use_dual_attention
         head_dim = hidden_dim // num_heads
         self.head_dim = head_dim
@@ -384,7 +458,7 @@ class DismantledBlock(layers.Layer):
         if use_projection:
             self.ada_layer_norm = AdaptiveLayerNormalization(
                 hidden_dim,
-                residual_modulation=True,
+                num_modulations=9 if use_dual_attention else 6,
                 dtype=self.dtype_policy,
                 name="ada_layer_norm",
             )
@@ -395,18 +469,10 @@ class DismantledBlock(layers.Layer):
         self.attention_qkv = layers.Dense(
             hidden_dim * 3, dtype=self.dtype_policy, name="attention_qkv"
         )
-        if qk_norm is not None and qk_norm == "rms_norm":
-            self.q_norm = layers.LayerNormalization(
-                epsilon=1e-6, rms_scaling=True, dtype="float32", name="q_norm"
-            )
-            self.k_norm = layers.LayerNormalization(
-                epsilon=1e-6, rms_scaling=True, dtype="float32", name="q_norm"
-            )
-        elif qk_norm is not None:
-            raise NotImplementedError(
-                "Supported `qk_norm` are `'rms_norm'` and `None`. "
-                f"Received: qk_norm={qk_norm}."
-            )
+        q_norm, k_norm = get_qk_norm(qk_norm)
+        if q_norm is not None:
+            self.q_norm = q_norm
+            self.k_norm = k_norm
         if use_projection:
             self.attention_proj = layers.Dense(
                 hidden_dim, dtype=self.dtype_policy, name="attention_proj"
@@ -426,6 +492,19 @@ class DismantledBlock(layers.Layer):
                 name="mlp",
             )
+        if use_dual_attention:
+            self.attention_qkv2 = layers.Dense(
+                hidden_dim * 3, dtype=self.dtype_policy, name="attention_qkv2"
+            )
+            q_norm2, k_norm2 = get_qk_norm(qk_norm, "q_norm2", "k_norm2")
+            if q_norm is not None:
+                self.q_norm2 = q_norm2
+                self.k_norm2 = k_norm2
+            if use_projection:
+                self.attention_proj2 = layers.Dense(
+                    hidden_dim, dtype=self.dtype_policy, name="attention_proj2"
+                )
     def build(self, inputs_shape, timestep_embedding):
         self.ada_layer_norm.build(inputs_shape, timestep_embedding)
         self.attention_qkv.build(inputs_shape)
@@ -437,6 +516,13 @@ class DismantledBlock(layers.Layer):
             self.attention_proj.build(inputs_shape)
             self.norm2.build(inputs_shape)
             self.mlp.build(inputs_shape)
+        if self.use_dual_attention:
+            self.attention_qkv2.build(inputs_shape)
+            if self.qk_norm is not None:
+                self.q_norm2.build([None, None, self.num_heads, self.head_dim])
+                self.k_norm2.build([None, None, self.num_heads, self.head_dim])
+            if self.use_projection:
+                self.attention_proj2.build(inputs_shape)
     def _modulate(self, inputs, shift, scale):
         inputs = ops.cast(inputs, self.compute_dtype)
@@ -456,8 +542,12 @@ class DismantledBlock(layers.Layer):
             )
             q, k, v = ops.unstack(qkv, 3, axis=2)
             if self.qk_norm is not None:
-                q = self.q_norm(q, training=training)
-                k = self.k_norm(k, training=training)
+                q = ops.cast(
+                    self.q_norm(q, training=training), self.compute_dtype
+                )
+                k = ops.cast(
+                    self.k_norm(k, training=training), self.compute_dtype
+                )
             return (q, k, v), (inputs, gate_msa, shift_mlp, scale_mlp, gate_mlp)
         else:
             x = self.ada_layer_norm(
@@ -469,8 +559,12 @@ class DismantledBlock(layers.Layer):
             )
             q, k, v = ops.unstack(qkv, 3, axis=2)
             if self.qk_norm is not None:
-                q = self.q_norm(q, training=training)
-                k = self.k_norm(k, training=training)
+                q = ops.cast(
+                    self.q_norm(q, training=training), self.compute_dtype
+                )
+                k = ops.cast(
+                    self.k_norm(k, training=training), self.compute_dtype
+                )
             return (q, k, v)
     def _compute_post_attention(
@@ -495,22 +589,95 @@ class DismantledBlock(layers.Layer):
         )
         return x
+    def _compute_pre_attention_with_dual_attention(
+        self, inputs, timestep_embedding, training=None
+    ):
+        batch_size = ops.shape(inputs)[0]
+        x, gate_msa, shift_mlp, scale_mlp, gate_mlp, x2, gate_msa2 = (
+            self.ada_layer_norm(inputs, timestep_embedding, training=training)
+        )
+        # Compute the main attention
+        qkv = self.attention_qkv(x, training=training)
+        qkv = ops.reshape(
+            qkv, (batch_size, -1, 3, self.num_heads, self.head_dim)
+        )
+        q, k, v = ops.unstack(qkv, 3, axis=2)
+        if self.qk_norm is not None:
+            q = ops.cast(self.q_norm(q, training=training), self.compute_dtype)
+            k = ops.cast(self.k_norm(k, training=training), self.compute_dtype)
+        # Compute the dual attention
+        qkv2 = self.attention_qkv2(x2, training=training)
+        qkv2 = ops.reshape(
+            qkv2, (batch_size, -1, 3, self.num_heads, self.head_dim)
+        )
+        q2, k2, v2 = ops.unstack(qkv2, 3, axis=2)
+        if self.qk_norm is not None:
+            q2 = ops.cast(
+                self.q_norm2(q2, training=training), self.compute_dtype
+            )
+            k2 = ops.cast(
+                self.k_norm2(k2, training=training), self.compute_dtype
+            )
+        return (
+            (q, k, v),
+            (q2, k2, v2),
+            (inputs, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2),
+        )
+    def _compute_post_attention_with_dual_attention(
+        self, inputs, inputs2, inputs_intermediates, training=None
+    ):
+        x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2 = (
+            inputs_intermediates
+        )
+        gate_msa = ops.expand_dims(gate_msa, axis=1)
+        shift_mlp = ops.expand_dims(shift_mlp, axis=1)
+        scale_mlp = ops.expand_dims(scale_mlp, axis=1)
+        gate_mlp = ops.expand_dims(gate_mlp, axis=1)
+        gate_msa2 = ops.expand_dims(gate_msa2, axis=1)
+        attn = self.attention_proj(inputs, training=training)
+        x = ops.add(x, ops.multiply(gate_msa, attn))
+        attn2 = self.attention_proj2(inputs2, training=training)
+        x = ops.add(x, ops.multiply(gate_msa2, attn2))
+        x = ops.add(
+            x,
+            ops.multiply(
+                gate_mlp,
+                self.mlp(
+                    self._modulate(self.norm2(x), shift_mlp, scale_mlp),
+                    training=training,
+                ),
+            ),
+        )
+        return x
     def call(
         self,
         inputs,
         timestep_embedding=None,
         inputs_intermediates=None,
+        inputs2=None,  # For the dual attention.
         pre_attention=True,
         training=None,
     ):
         if pre_attention:
-            return self._compute_pre_attention(
-                inputs, timestep_embedding, training=training
-            )
+            if self.use_dual_attention:
+                return self._compute_pre_attention_with_dual_attention(
+                    inputs, timestep_embedding, training=training
+                )
+            else:
+                return self._compute_pre_attention(
+                    inputs, timestep_embedding, training=training
+                )
         else:
-            return self._compute_post_attention(
-                inputs, inputs_intermediates, training=training
-            )
+            if self.use_dual_attention:
+                return self._compute_post_attention_with_dual_attention(
+                    inputs, inputs2, inputs_intermediates, training=training
+                )
+            else:
+                return self._compute_post_attention(
+                    inputs, inputs_intermediates, training=training
+                )
     def get_config(self):
         config = super().get_config()
@@ -521,6 +688,7 @@ class DismantledBlock(layers.Layer):
                 "mlp_ratio": self.mlp_ratio,
                 "use_projection": self.use_projection,
                 "qk_norm": self.qk_norm,
+                "use_dual_attention": self.use_dual_attention,
             }
         )
         return config
@@ -542,6 +710,8 @@ class MMDiTBlock(layers.Layer):
             layer at the end of the context block.
         qk_norm: Optional str. Whether to normalize the query and key tensors.
             Available options are `None` and `"rms_norm"`. Defaults to `None`.
+        use_dual_attention: bool. Whether to use a dual attention in the
+            block. Defaults to `False`.
         **kwargs: other keyword arguments passed to `keras.layers.Layer`,
             including `name`, `dtype` etc.
@@ -557,6 +727,7 @@ class MMDiTBlock(layers.Layer):
         mlp_ratio=4.0,
         use_context_projection=True,
         qk_norm=None,
+        use_dual_attention=False,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -565,6 +736,7 @@ class MMDiTBlock(layers.Layer):
         self.mlp_ratio = mlp_ratio
         self.use_context_projection = use_context_projection
         self.qk_norm = qk_norm
+        self.use_dual_attention = use_dual_attention
         head_dim = hidden_dim // num_heads
         self.head_dim = head_dim
@@ -576,6 +748,7 @@ class MMDiTBlock(layers.Layer):
             mlp_ratio=mlp_ratio,
             use_projection=True,
             qk_norm=qk_norm,
+            use_dual_attention=use_dual_attention,
             dtype=self.dtype_policy,
             name="x_block",
         )
@@ -602,8 +775,6 @@ class MMDiTBlock(layers.Layer):
         if hasattr(ops, "dot_product_attention") and hasattr(
             keras.config, "is_flash_attention_enabled"
         ):
-            # `ops.dot_product_attention` is slower than the vanilla
-            # implementation in the tensorflow backend.
             encoded = ops.dot_product_attention(
                 query,
                 key,
@@ -643,9 +814,14 @@ class MMDiTBlock(layers.Layer):
                 training=training,
             )
         context_len = ops.shape(context_qkv[0])[1]
-        x_qkv, x_intermediates = self.x_block(
-            x, timestep_embedding=timestep_embedding, training=training
-        )
+        if self.x_block.use_dual_attention:
+            x_qkv, x_qkv2, x_intermediates = self.x_block(
+                x, timestep_embedding=timestep_embedding, training=training
+            )
+        else:
+            x_qkv, x_intermediates = self.x_block(
+                x, timestep_embedding=timestep_embedding, training=training
+            )
         q = ops.concatenate([context_qkv[0], x_qkv[0]], axis=1)
         k = ops.concatenate([context_qkv[1], x_qkv[1]], axis=1)
         v = ops.concatenate([context_qkv[2], x_qkv[2]], axis=1)
@@ -656,12 +832,23 @@ class MMDiTBlock(layers.Layer):
         x_attention = attention[:, context_len:]
         # Compute post-attention.
-        x = self.x_block(
-            x_attention,
-            inputs_intermediates=x_intermediates,
-            pre_attention=False,
-            training=training,
-        )
+        if self.x_block.use_dual_attention:
+            q2, k2, v2 = x_qkv2
+            x_attention2 = self._compute_attention(q2, k2, v2)
+            x = self.x_block(
+                x_attention,
+                inputs_intermediates=x_intermediates,
+                inputs2=x_attention2,
+                pre_attention=False,
+                training=training,
+            )
+        else:
+            x = self.x_block(
+                x_attention,
+                inputs_intermediates=x_intermediates,
+                pre_attention=False,
+                training=training,
+            )
         if self.use_context_projection:
             context = self.context_block(
                 context_attention,
@@ -682,6 +869,7 @@ class MMDiTBlock(layers.Layer):
                 "mlp_ratio": self.mlp_ratio,
                 "use_context_projection": self.use_context_projection,
                 "qk_norm": self.qk_norm,
+                "use_dual_attention": self.use_dual_attention,
             }
         )
         return config
@@ -761,6 +949,9 @@ class MMDiT(Backbone):
         qk_norm: Optional str. Whether to normalize the query and key tensors in
             the intermediate blocks. Available options are `None` and
             `"rms_norm"`. Defaults to `None`.
+        dual_attention_indices: Optional tuple. Specifies the indices of
+            the blocks that serve as dual attention blocks. Typically, this is
+            for 3.5 version. Defaults to `None`.
         data_format: `None` or str. If specified, either `"channels_last"` or
             `"channels_first"`. The ordering of the dimensions in the
             inputs. `"channels_last"` corresponds to inputs with shape
@@ -786,6 +977,7 @@ class MMDiT(Backbone):
         context_shape=(None, 4096),
         pooled_projection_shape=(2048,),
         qk_norm=None,
+        dual_attention_indices=None,
         data_format=None,
         dtype=None,
         **kwargs,
@@ -799,6 +991,7 @@ class MMDiT(Backbone):
         image_width = latent_shape[1] // patch_size
         output_dim = latent_shape[-1]
         output_dim_in_final = patch_size**2 * output_dim
+        dual_attention_indices = dual_attention_indices or ()
         data_format = standardize_data_format(data_format)
         if data_format != "channels_last":
             raise NotImplementedError(
@@ -840,6 +1033,7 @@ class MMDiT(Backbone):
                 mlp_ratio,
                 use_context_projection=not (i == num_layers - 1),
                 qk_norm=qk_norm,
+                use_dual_attention=i in dual_attention_indices,
                 dtype=dtype,
                 name=f"joint_block_{i}",
             )
@@ -910,6 +1104,7 @@ class MMDiT(Backbone):
         self.context_shape = context_shape
         self.pooled_projection_shape = pooled_projection_shape
         self.qk_norm = qk_norm
+        self.dual_attention_indices = dual_attention_indices
     def get_config(self):
         config = super().get_config()
@@ -925,6 +1120,7 @@ class MMDiT(Backbone):
                 "context_shape": self.context_shape,
                 "pooled_projection_shape": self.pooled_projection_shape,
                 "qk_norm": self.qk_norm,
+                "dual_attention_indices": self.dual_attention_indices,
             }
         )
         return config

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py CHANGED Viewed

@@ -205,7 +205,10 @@ class StableDiffusion3Backbone(Backbone):
         mmdit_qk_norm: Optional str. Whether to normalize the query and key
             tensors for each transformer in MMDiT. Available options are `None`
             and `"rms_norm"`. Typically, this is set to `None` for 3.0 version
-            and to `"rms_norm" for 3.5 version.
+            and to `"rms_norm"` for 3.5 version.
+        mmdit_dual_attention_indices: Optional tuple. Specifies the indices of
+            the blocks that serve as dual attention blocks. Typically, this is
+            for 3.5 version. Defaults to `None`.
         vae: The VAE used for transformations between pixel space and latent
             space.
         clip_l: The CLIP text encoder for encoding the inputs.
@@ -253,6 +256,7 @@ class StableDiffusion3Backbone(Backbone):
         mmdit_depth=4,
         mmdit_position_size=192,
         mmdit_qk_norm=None,
+        mmdit_dual_attention_indices=None,
         vae=vae,
         clip_l=clip_l,
         clip_g=clip_g,
@@ -268,6 +272,7 @@ class StableDiffusion3Backbone(Backbone):
         mmdit_num_heads,
         mmdit_position_size,
         mmdit_qk_norm,
+        mmdit_dual_attention_indices,
         vae,
         clip_l,
         clip_g,
@@ -319,6 +324,7 @@ class StableDiffusion3Backbone(Backbone):
             context_shape=context_shape,
             pooled_projection_shape=pooled_projection_shape,
             qk_norm=mmdit_qk_norm,
+            dual_attention_indices=mmdit_dual_attention_indices,
             data_format=data_format,
             dtype=dtype,
             name="diffuser",
@@ -454,6 +460,7 @@ class StableDiffusion3Backbone(Backbone):
         self.mmdit_num_heads = mmdit_num_heads
         self.mmdit_position_size = mmdit_position_size
         self.mmdit_qk_norm = mmdit_qk_norm
+        self.mmdit_dual_attention_indices = mmdit_dual_attention_indices
         self.latent_channels = latent_channels
         self.output_channels = output_channels
         self.num_train_timesteps = num_train_timesteps
@@ -590,6 +597,9 @@ class StableDiffusion3Backbone(Backbone):
                 "mmdit_num_heads": self.mmdit_num_heads,
                 "mmdit_position_size": self.mmdit_position_size,
                 "mmdit_qk_norm": self.mmdit_qk_norm,
+                "mmdit_dual_attention_indices": (
+                    self.mmdit_dual_attention_indices
+                ),
                 "vae": layers.serialize(self.vae),
                 "clip_l": layers.serialize(self.clip_l),
                 "clip_g": layers.serialize(self.clip_g),
@@ -638,7 +648,10 @@ class StableDiffusion3Backbone(Backbone):
             )
         # To maintain backward compatibility, we need to ensure that
-        # `mmdit_qk_norm` is included in the config.
+        # `mmdit_qk_norm` and `mmdit_dual_attention_indices` is included in the
+        # config.
         if "mmdit_qk_norm" not in config:
             config["mmdit_qk_norm"] = None
+        if "mmdit_dual_attention_indices" not in config:
+            config["mmdit_dual_attention_indices"] = None
         return cls(**config)

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py CHANGED Viewed

@@ -82,8 +82,7 @@ class StableDiffusion3Inpaint(Inpaint):
     def fit(self, *args, **kwargs):
         raise NotImplementedError(
-            "Currently, `fit` is not supported for "
-            "`StableDiffusion3Inpaint`."
+            "Currently, `fit` is not supported for `StableDiffusion3Inpaint`."
         )
     def generate_step(

keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py CHANGED Viewed

@@ -13,6 +13,18 @@ backbone_presets = {
         },
         "kaggle_handle": "kaggle://keras/stablediffusion3/keras/stable_diffusion_3_medium/4",
     },
+    "stable_diffusion_3.5_medium": {
+        "metadata": {
+            "description": (
+                "3 billion parameter, including CLIP L and CLIP G text "
+                "encoders, MMDiT-X generative model, and VAE autoencoder. "
+                "Developed by Stability AI."
+            ),
+            "params": 3371793763,
+            "path": "stable_diffusion_3",
+        },
+        "kaggle_handle": "kaggle://keras/stablediffusion3/keras/stable_diffusion_3.5_medium/1",
+    },
     "stable_diffusion_3.5_large": {
         "metadata": {
             "description": (

keras_hub/src/models/vit/vit_layers.py CHANGED Viewed

@@ -351,7 +351,7 @@ class ViTEncoder(keras.layers.Layer):
                 attention_dropout=self.attention_dropout,
                 layer_norm_epsilon=self.layer_norm_epsilon,
                 dtype=self.dtype_policy,
-                name=f"tranformer_block_{i+1}",
+                name=f"tranformer_block_{i + 1}",
             )
             encoder_block.build((None, None, self.hidden_dim))
             self.encoder_layers.append(encoder_block)

keras_hub/src/tokenizers/byte_tokenizer.py CHANGED Viewed

@@ -150,8 +150,7 @@ class ByteTokenizer(tokenizer.Tokenizer):
     ):
         if not is_int_dtype(dtype):
             raise ValueError(
-                "Output dtype must be an integer type. "
-                f"Received: dtype={dtype}"
+                f"Output dtype must be an integer type. Received: dtype={dtype}"
             )
         # Check normalization_form.

keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import io
+from keras_hub.src.utils.tensor_utils import assert_tf_libs_installed
 try:
     import sentencepiece as spm
     import tensorflow as tf
@@ -77,6 +79,7 @@ def compute_sentence_piece_proto(
     tf.Tensor([ 4  8 12  5  9 14  5  6 13  4  7 10 11  6 13],
     shape=(15,), dtype=int32)
     """
+    assert_tf_libs_installed("compute_sentence_piece_proto")
     if spm is None:
         raise ImportError(

keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py CHANGED Viewed

@@ -203,8 +203,7 @@ class UnicodeCodepointTokenizer(tokenizer.Tokenizer):
     ) -> None:
         if not is_int_dtype(dtype):
             raise ValueError(
-                "Output dtype must be an integer type. "
-                f"Received: dtype={dtype}"
+                f"Output dtype must be an integer type. Received: dtype={dtype}"
             )
         # Check normalization_form.

keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.tokenizers.word_piece_tokenizer import pretokenize
+from keras_hub.src.utils.tensor_utils import assert_tf_libs_installed
 try:
     import tensorflow as tf
@@ -117,6 +118,8 @@ def compute_word_piece_vocabulary(
     inputs.map(tokenizer.tokenize)
     ```
     """  # noqa: E501
+    assert_tf_libs_installed("compute_word_piece_vocabulary")
     # Read data files.
     if not isinstance(data, (list, tf.data.Dataset)):
         raise ValueError(

keras_hub/src/utils/timm/convert_densenet.py CHANGED Viewed

@@ -59,9 +59,11 @@ def convert_weights(backbone, loader, timm_config):
     num_stacks = len(backbone.stackwise_num_repeats)
     for stack_index in range(num_stacks):
         for block_idx in range(backbone.stackwise_num_repeats[stack_index]):
-            keras_name = f"stack{stack_index+1}_block{block_idx+1}"
+            keras_name = f"stack{stack_index + 1}_block{block_idx + 1}"
             hf_name = (
-                f"features.denseblock{stack_index+1}.denselayer{block_idx+1}"
+                "features."
+                f"denseblock{stack_index + 1}"
+                f".denselayer{block_idx + 1}"
             )
             port_batch_normalization(f"{keras_name}_1_bn", f"{hf_name}.norm1")
             port_conv2d(f"{keras_name}_1_conv", f"{hf_name}.conv1")
@@ -69,8 +71,8 @@ def convert_weights(backbone, loader, timm_config):
             port_conv2d(f"{keras_name}_2_conv", f"{hf_name}.conv2")
     for stack_index in range(num_stacks - 1):
-        keras_transition_name = f"transition{stack_index+1}"
-        hf_transition_name = f"features.transition{stack_index+1}"
+        keras_transition_name = f"transition{stack_index + 1}"
+        hf_transition_name = f"features.transition{stack_index + 1}"
         port_batch_normalization(
             f"{keras_transition_name}_bn", f"{hf_transition_name}.norm"
         )

keras_hub/src/utils/timm/convert_efficientnet.py CHANGED Viewed

@@ -268,7 +268,7 @@ def convert_weights(backbone, loader, timm_config):
             # 97 is the start of the lowercase alphabet.
             letter_identifier = chr(block_idx + 97)
-            keras_block_prefix = f"block{stack_index+1}{letter_identifier}_"
+            keras_block_prefix = f"block{stack_index + 1}{letter_identifier}_"
             hf_block_prefix = f"blocks.{stack_index}.{block_idx}."
             if block_type == "v1":

keras_hub/src/utils/timm/convert_resnet.py CHANGED Viewed

@@ -89,7 +89,7 @@ def convert_weights(backbone, loader, timm_config):
         for block_idx in range(backbone.stackwise_num_blocks[stack_index]):
             if version == "v1":
                 keras_name = f"stack{stack_index}_block{block_idx}"
-                hf_name = f"layer{stack_index+1}.{block_idx}"
+                hf_name = f"layer{stack_index + 1}.{block_idx}"
             else:
                 keras_name = f"stack{stack_index}_block{block_idx}"
                 hf_name = f"stages.{stack_index}.blocks.{block_idx}"

keras_hub/src/version_utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from keras_hub.src.api_export import keras_hub_export
 # Unique source of truth for the version number.
-__version__ = "0.19.0.dev202501080345"
+__version__ = "0.19.0.dev202501150344"
 @keras_hub_export("keras_hub.version")

{keras_hub_nightly-0.19.0.dev202501080345.dist-info → keras_hub_nightly-0.19.0.dev202501150344.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: keras-hub-nightly
-Version: 0.19.0.dev202501080345
+Version: 0.19.0.dev202501150344
 Summary: Industry-strength Natural Language Processing extensions for Keras.
 Home-page: https://github.com/keras-team/keras-hub
 Author: Keras team
@@ -31,6 +31,17 @@ Requires-Dist: tensorflow-text
 Provides-Extra: extras
 Requires-Dist: rouge-score; extra == "extras"
 Requires-Dist: sentencepiece; extra == "extras"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # KerasHub: Multi-framework Pretrained Models
 [![](https://github.com/keras-team/keras-hub/workflows/Tests/badge.svg?branch=master)](https://github.com/keras-team/keras-hub/actions?query=workflow%3ATests+branch%3Amaster)

{keras_hub_nightly-0.19.0.dev202501080345.dist-info → keras_hub_nightly-0.19.0.dev202501150344.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ keras_hub/api/tokenizers/__init__.py,sha256=mtJgQy1spfQnPAkeLoeinsT_W9iCWHlJXwzc
 keras_hub/api/utils/__init__.py,sha256=Gp1E6gG-RtKQS3PBEQEOz9PQvXkXaJ0ySGMqZ7myN7A,215
 keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
-keras_hub/src/version_utils.py,sha256=PULLssHhM5UCqNcvlGVVyAQo9rDMkhLThc6DLU9Kz2g,222
+keras_hub/src/version_utils.py,sha256=XXUJ1oMuODMzez6Sqr-8PGIem4zG0YD-78PlkMxNEXI,222
 keras_hub/src/bounding_box/__init__.py,sha256=7i6KnGupN4AVivR_dFjQyuuTbI0GkHy8d-aMXeqZdU8,95
 keras_hub/src/bounding_box/converters.py,sha256=UUp1hwegpDZyIo8sh9TLNy1v6JjwmvwzL6wmHFMAtbk,21916
 keras_hub/src/bounding_box/formats.py,sha256=YmskOz2BOSat7NaE__J9VfpSNGPJJR0znSzA4lp8MMI,3868
@@ -43,7 +43,7 @@ keras_hub/src/layers/preprocessing/random_deletion.py,sha256=x23nRo0ir2J4Ps42i9X
 keras_hub/src/layers/preprocessing/random_swap.py,sha256=w2z7yNQsII5g4sEFi4GXfgxIc1S6UUt3a8YWZew_f4U,9504
 keras_hub/src/layers/preprocessing/start_end_packer.py,sha256=lY2K937z6JucxNe7VknynhhjrcUfFigU6mqIdv2gS-Y,7973
 keras_hub/src/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-keras_hub/src/metrics/bleu.py,sha256=N4hnlCIFk558nZAHxGlzLYBx6gwpWS3Wvw1iFM69xiA,13665
+keras_hub/src/metrics/bleu.py,sha256=pnid5azpAxO6vKEfUtAby3nH29OGbwYKgVGOGeoaA3I,13694
 keras_hub/src/metrics/edit_distance.py,sha256=kjhe8uNjvv8aN49RyrKAbNi7a8_OlB8fMza0J_CfNQg,6353
 keras_hub/src/metrics/perplexity.py,sha256=dDUQcfE5JbAruG3spEkgue6IjHcynqgmGpJAqWg22Tw,6139
 keras_hub/src/metrics/rouge_base.py,sha256=Pt2DUznhTTeR-fX1nQ_wSbPtmuTgxQTvrGpu8LRVapE,6264
@@ -87,7 +87,7 @@ keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py,sha256=3_e-ULIcm_3DK
 keras_hub/src/models/bart/bart_tokenizer.py,sha256=Q7IXmIwXzhPSN427oQRyF9ufoExQGS184Yo_4boaOZo,2811
 keras_hub/src/models/basnet/__init__.py,sha256=4N6XvIUYYJl5xtoaL3_9fawUX_qP3WmTYNEEU7tn8Gw,253
 keras_hub/src/models/basnet/basnet.py,sha256=JA58Q9lmygdSOm5MUaPAlaL6B8XnmqCcRaGrk9c8P3Q,4287
-keras_hub/src/models/basnet/basnet_backbone.py,sha256=t_52WW6jetONS7AnPf9YsiMLDqOjVwjNuayQEv6ZAk4,13503
+keras_hub/src/models/basnet/basnet_backbone.py,sha256=P-jogkYIu9j7_28fl2RFQRMl87BXz1wcY_LtIrxBy1E,13505
 keras_hub/src/models/basnet/basnet_image_converter.py,sha256=DwzAwtZeggYw_qyRQ-Abnnm885Wobv3wClxRzOTscI0,342
 keras_hub/src/models/basnet/basnet_preprocessor.py,sha256=uM504utaXODSqR5zpKnopRuaV_l84zCg06RkNoNSKIs,510
 keras_hub/src/models/basnet/basnet_presets.py,sha256=z6tR2q_EvYnUmGfsWIWYfmR_8gvWYPH3QmtpAu_T8f8,63
@@ -135,11 +135,11 @@ keras_hub/src/models/deeplab_v3/__init__.py,sha256=FHAUPM4a1DJj4EsNTbYEd1riNq__u
 keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py,sha256=dH7HHu_NAnE-HP6ivOL7fFLQZHt_MWmehlMccLljhPc,7764
 keras_hub/src/models/deeplab_v3/deeplab_v3_image_converter.py,sha256=mRkH3HdhpV0fCcQcVXEvIX7SNk-bAMb3SAHzgK-FD5c,371
 keras_hub/src/models/deeplab_v3/deeplab_v3_image_segmeter_preprocessor.py,sha256=hR9S6lNYamY0EBDBo3e1qTCiwtftmLXrN-UYuzfw5Io,581
-keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py,sha256=qmEiolOOriLAojXB67xXW9IOo717kaCGeDVZJLaGY98,7834
+keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py,sha256=mz9nG55gdXSTDE96AXgeTCwUFB95DIpTuqrvWIt5Lco,7840
 keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py,sha256=ZKYY8A7mV2QvwXwjDUd9xAbVHo58-Hgj_IqNUbuyCIU,625
 keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py,sha256=pubi30sPJKLOpz9fRQff2FZt_53KBvwf2uyaJ5YL7J8,3726
 keras_hub/src/models/densenet/__init__.py,sha256=r7StyamnWeeZxOk9r4ZYNbS_YVhu9YGPyXhNxljvdPg,269
-keras_hub/src/models/densenet/densenet_backbone.py,sha256=5QawyB4EhyaXpmm8l_QUYveU7kEet3jRD3s94XAz8Tw,6738
+keras_hub/src/models/densenet/densenet_backbone.py,sha256=f2nfsXyXQert2aYHq-L-JZtp8inq1fs1K47rzZQ9nTI,6744
 keras_hub/src/models/densenet/densenet_image_classifier.py,sha256=ye-Ix3oU42pfsDoh-h1PG4di1kzldO0ZO7Nj304p_X4,544
 keras_hub/src/models/densenet/densenet_image_classifier_preprocessor.py,sha256=xDZbTw_h6pjLDzf8QmbDyMnMsFzgh-dPX1ldg9kddhg,563
 keras_hub/src/models/densenet/densenet_image_converter.py,sha256=DoxYlJVZ9uaabFhVjWOmzvhONoc8KNcQj2vQ6Z1AUpU,354
@@ -186,7 +186,7 @@ keras_hub/src/models/flux/flux_layers.py,sha256=wevcAEbayBD8bVm-21FBi2LQ13pZzB99
 keras_hub/src/models/flux/flux_maths.py,sha256=2pnHW8HW7V2JZ8HIrUwE-UU4klpFQaOkoAvG5nWVfyY,7502
 keras_hub/src/models/flux/flux_model.py,sha256=K92PyeFHIp8SwXuxhv__XCEaQ2wqSW1jOb97I4S24Rw,8991
 keras_hub/src/models/flux/flux_presets.py,sha256=z7C_FbI1_F5YETXuWpc7Yh_0w-5N0eBQy6Oks_X9W88,54
-keras_hub/src/models/flux/flux_text_to_image.py,sha256=mI_QxOzjXl3b5s7Q1LZemceCdeboqPD5ilEPEEyer40,4169
+keras_hub/src/models/flux/flux_text_to_image.py,sha256=Rf5dD2EhG0bE8Gyg9sqaA8YEexS1kdraofIkxiZDjvc,4166
 keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=Fs9jr97QtmRUbRRz1kITpkuhDM2GoV3n0XSFC-qQA14,2252
 keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
 keras_hub/src/models/gemma/gemma_attention.py,sha256=1CVN5z9GKoU8TuNMih2_MweDkpd98xSqdic9F8xIBE8,8317
@@ -257,7 +257,7 @@ keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=AViEs6YltUqWnIVo7
 keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py,sha256=F57y0fZ0wYYxfGIjfrJc1W9uQpViYFx5bvFjj5CqUbI,4814
 keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py,sha256=24ABQ1vGlppV-KfWh0YqJjzM_Lu2GIwvyJ4X2XXie_A,5616
 keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py,sha256=5yM_jUtrFsWIieiwfFBoP7mtPmQAwywkeLKbd7fhmzk,371
-keras_hub/src/models/pali_gemma/pali_gemma_presets.py,sha256=O648iwzs0wooiQCfDQ-n0wOtzIOEDGXRSwSb_Brx2Ck,8985
+keras_hub/src/models/pali_gemma/pali_gemma_presets.py,sha256=Ka1ChUUSKw-yY2th3QtmNtkeXt0krYfwhkHrScioMls,8979
 keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py,sha256=ljTiADHo0Ok88q-jVzwJIle2C8xcxnudLTsBLzIySaM,2415
 keras_hub/src/models/pali_gemma/pali_gemma_vit.py,sha256=ViPKfGksbxBGJ3iS3M_KWxRc8Ie4LF7rWWUKDiqECJE,18285
 keras_hub/src/models/phi3/__init__.py,sha256=zIbf1MU-ks91mEkjTRJAsk51N3BBnXDF2JM1vO-13PQ,245
@@ -271,7 +271,7 @@ keras_hub/src/models/phi3/phi3_presets.py,sha256=sb2ce7Gq1OikFEf2KIYG69rFKHYKj8q
 keras_hub/src/models/phi3/phi3_rotary_embedding.py,sha256=wqiRn8nETNcLc5Vsm_d_8s11Ro6ibWZbWvODdLqIOo4,5013
 keras_hub/src/models/phi3/phi3_tokenizer.py,sha256=bOPH14wTVVHJHq8mgzXLjsgvKMNhfO8eayevAPpjYVA,1992
 keras_hub/src/models/resnet/__init__.py,sha256=C5UqlQ6apm8WSp1bnrxB6Bi3BGaknxRQs-r3b2wpaGA,257
-keras_hub/src/models/resnet/resnet_backbone.py,sha256=3acTjdWbnos8l_TPxYLgoV3Y4V_vJ_o1AqGhiQu459k,31274
+keras_hub/src/models/resnet/resnet_backbone.py,sha256=Q7nlqcTXZzjqd0e-DsjHC4ok58yOX7qxseotym3uZpM,31276
 keras_hub/src/models/resnet/resnet_image_classifier.py,sha256=nf35EKDzvBkfhHsK-s6Ks0nbhvKO7HEOYZm94YckyWE,510
 keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py,sha256=fM7gyQ0qB-RRuI4USJkRD6q9-HVfuC71e-BLTo-UhHQ,543
 keras_hub/src/models/resnet/resnet_image_converter.py,sha256=fgTxihJznGFss-y3Z-jp0JE3X1gaaB2y-f2KMwrT8Pk,342
@@ -279,7 +279,7 @@ keras_hub/src/models/resnet/resnet_presets.py,sha256=cryfXlC_FSEN_jrexKIh5aVbzp8
 keras_hub/src/models/retinanet/__init__.py,sha256=veWIFvMN6151M69l7FvTcI-IIEe_8dLmNO5NLOszQ1c,275
 keras_hub/src/models/retinanet/anchor_generator.py,sha256=0OgKSW3OKmbc0cOPHF6FYTAzn7fcHklg665PGSwAaDM,6504
 keras_hub/src/models/retinanet/box_matcher.py,sha256=l820r1R-ByqiyVgmZ0YFjjz0njchDda-wItzLn1X84o,10834
-keras_hub/src/models/retinanet/feature_pyramid.py,sha256=VxLcOEjJSXIDu30oMcZEYdVlpHaOP3IutZNwh0N3uHQ,17604
+keras_hub/src/models/retinanet/feature_pyramid.py,sha256=hbdrj6X-D2SlwOp2h1WcBlTdSAlLmFK43X7OrkJRoMA,17614
 keras_hub/src/models/retinanet/non_max_supression.py,sha256=PMOLlRw-EnyEmhlUhJjEbHf1xXiplN95pUxQbiJQbN4,20996
 keras_hub/src/models/retinanet/prediction_head.py,sha256=xWHt21-SS2t7vCmTONlR1lSbJXhml5jx68V8MGbGybg,7863
 keras_hub/src/models/retinanet/retinanet_backbone.py,sha256=BJBPJLxpOCOU0Br7b4JsgCZBHQHLAhxLqo9BHNIsl1g,5659
@@ -314,11 +314,11 @@ keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py,sha256=
 keras_hub/src/models/segformer/segformer_presets.py,sha256=ET39ospixkTaCsjoMLdJrr3wlGvTAQu5prleVC5lMZI,4793
 keras_hub/src/models/stable_diffusion_3/__init__.py,sha256=ZKYQuaRObyhKq8GVAHmoRvlXp6FpU8ChvutVCHyXKuc,343
 keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py,sha256=vtVhieAv277mAiZj7Kvvqg_Ba7klfQxZVk4PPxNNQ0s,3062
-keras_hub/src/models/stable_diffusion_3/mmdit.py,sha256=poJlz-xt06hgOtn_Bw5YQDxZtDBc9L4Vo0ahhGwPly4,33340
-keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py,sha256=u0Wwtbl5b-1z_vn07TRw4jpkVYrReZeHbWqQIrZjyCA,23368
+keras_hub/src/models/stable_diffusion_3/mmdit.py,sha256=0gq2tcIqcbiGKKDDj3vrRsF67U3qE9g706XPs2BfCOY,40979
+keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py,sha256=w8lsMampk34M9xQi96mEnXmkaKQqFQtoFTW8zP7ilEA,24078
 keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py,sha256=oQcVCWOwrdUTrr_JNekoMqdSlKYMGz5tG6v8uD25lTc,5479
-keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py,sha256=aZMIC-GYjLhdU_yM7fJEznApCo1zwRAgwQbW0tCW0xY,6399
-keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py,sha256=z6wrfv8rCqLBzn7_edRcKCIDQRTNUgLqyr-LLp55-IE,1680
+keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py,sha256=t4uw920Jn1k80air3WRGimKf71aMVu6q73oWFH348vk,6384
+keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py,sha256=x7Ez4L955MJE4ABtBy-63YpU9XpR0Ro8QWPzYYJs1yE,2167
 keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py,sha256=Yt-UIatVKANjjKFCFEj1rIHhOrt8hqefKKQJIAWcTLc,4567
 keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py,sha256=m5PdVSgTcYuqd7jOQ8wD4PAnMa7wY2WdhwpK3hdydhM,2756
 keras_hub/src/models/stable_diffusion_3/t5_encoder.py,sha256=oV7P1uwCKdGiD93zXq7kmqX0elMZQU4UvBa8wg6P1hs,5113
@@ -344,7 +344,7 @@ keras_hub/src/models/vit/vit_backbone.py,sha256=kGmRZO4u-1q4PBcbhJbiWVIEVYAcp2H4
 keras_hub/src/models/vit/vit_image_classifier.py,sha256=lMVxiD1_6drx7XQ7P7YzlqnFP7kT1zlMe84f-T3SDQI,6332
 keras_hub/src/models/vit/vit_image_classifier_preprocessor.py,sha256=wu6YcBlXMWB9sKCPvmNdGBZKTLQt_HyHWS6P9nyDwsk,504
 keras_hub/src/models/vit/vit_image_converter.py,sha256=5xVF04BzMcdTDc6aErAYj3_BuGmVd3zoJMcH1ho4T0g,2561
-keras_hub/src/models/vit/vit_layers.py,sha256=s4j3n3qnJnv6W9AdUkNsO3Vsi_BhxEGECYkaLVCU6XY,13238
+keras_hub/src/models/vit/vit_layers.py,sha256=Zsz-ARPY49S1nXLUtpFwtPfw31D-vCtKesEo_2JIKPA,13240
 keras_hub/src/models/vit/vit_presets.py,sha256=zZhxUleOom1ie3gn0Mi-_xhhdFEEsnqSQyKADV2L38k,4479
 keras_hub/src/models/vit_det/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/models/vit_det/vit_det_backbone.py,sha256=DOZ5J7c1t5PAZ6y0pMmBoQTMOUup7UoUrYVfCs69ltY,7697
@@ -383,13 +383,13 @@ keras_hub/src/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
 keras_hub/src/tests/test_case.py,sha256=oGWoUhlKgjVMNIjvUVnQR-k5iKvodztHsFMOs669Trw,27402
 keras_hub/src/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/tokenizers/byte_pair_tokenizer.py,sha256=WeUlHMAf5y_MUjFIfVhEcFoOZu-z4kkSj-Dq-pegM9w,24052
-keras_hub/src/tokenizers/byte_tokenizer.py,sha256=c1a41eVuLzGmBtscQ0RxPIqFi41m_604KJ9fdpPR7Sc,10437
+keras_hub/src/tokenizers/byte_tokenizer.py,sha256=GPIKaddXugbfckfhodADsBpaYb72DgFMs_xfXHnK4qU,10418
 keras_hub/src/tokenizers/sentence_piece_tokenizer.py,sha256=nOqkpa2nHitITpdowPHdwxiN87e8huLW8Dt2gozVnhI,9350
-keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py,sha256=LhUxwcaDKt5V58DBzK9Sh4D-hOL80SHGpL4LavWbq74,4642
+keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py,sha256=caqgV9N4lH97zBviFPdpwo_O95AaJBEJLQv6Icq3Hs8,4774
 keras_hub/src/tokenizers/tokenizer.py,sha256=v0Ka5ayrBwpsGBlkIadXK-b4RsMTbhV6BZrvKullbxY,9722
-keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=KxuVsUx3ntGsuqaQ-gnFWFfoVLsl5Hag7rBk6xfq-fQ,13572
+keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=hRv_XxoPIPDpHfO0ZttSOv_M89sMaFpvmllojvKz_ac,13553
 keras_hub/src/tokenizers/word_piece_tokenizer.py,sha256=vP6AZgbzsRiuPCt3W_n94nsF7XiERnagWcH_rqJHtVU,19943
-keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=Zz1SGgArykxBVWnS5YV-ViqyMOrw3j3i_i_jto96zCg,6610
+keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=cylrs02ZrYQ1TuZr9oyS3NrVbDwGctA3VXbIh1pFJMQ,6743
 keras_hub/src/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/utils/keras_utils.py,sha256=0yKIfFuO_IqAH8vHbG3ncRmCVKg__xRGfQtLYWZ8YuA,1695
 keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
@@ -399,9 +399,9 @@ keras_hub/src/utils/tensor_utils.py,sha256=YVJesN91bk-OzJXY1mOKBppuY8noBU7zhPQNX
 keras_hub/src/utils/imagenet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 keras_hub/src/utils/imagenet/imagenet_utils.py,sha256=MvIvv1WJo51ZXBxy4S7t_DsN3ZMtJWlC4cmRvKM2kIA,39304
 keras_hub/src/utils/timm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-keras_hub/src/utils/timm/convert_densenet.py,sha256=V-GRjWuDnlh3b1EMxqahwZ3GMwSgOa3v0HOfb2ZZ-d0,3342
-keras_hub/src/utils/timm/convert_efficientnet.py,sha256=wkOKTLS_N_VKy1CQQGjSlD_TPSOOmCMMXQvbjravN6g,17098
-keras_hub/src/utils/timm/convert_resnet.py,sha256=ee8eTml0ffJKE8avzGoLFcpjPF63DsvoIUArAGa8Ngg,5832
+keras_hub/src/utils/timm/convert_densenet.py,sha256=fu8HBIQis5o3ib2tyI2qnmYScVrVIQySok8vTfa1qJ8,3393
+keras_hub/src/utils/timm/convert_efficientnet.py,sha256=SgEIlyyinS04qoQpEgh3WazHq544zNUCCpfmWh3EjSs,17100
+keras_hub/src/utils/timm/convert_resnet.py,sha256=8JFkVtdpy5z9h83LJ97rD-a8FRejXPZvMNksNuStqjM,5834
 keras_hub/src/utils/timm/convert_vgg.py,sha256=MT5jGnLrzenPpe66Af_Lp1IdR9KGtsSrcmn6_UPqHvQ,2419
 keras_hub/src/utils/timm/preset_loader.py,sha256=cdZDjthZdTD2myMOenQar4ACyi7VTuIzNRg24LuqS-4,3374
 keras_hub/src/utils/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -417,7 +417,7 @@ keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYum
 keras_hub/src/utils/transformers/convert_vit.py,sha256=9SUZ9utNJhW_5cj3acMn9cRy47u2eIcDsrhmzj77o9k,5187
 keras_hub/src/utils/transformers/preset_loader.py,sha256=DgGJXbTSB9Na8FIR-YWWVqQPOFxHwWrGm41EwcS_EFs,3797
 keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
-keras_hub_nightly-0.19.0.dev202501080345.dist-info/METADATA,sha256=WWsYYpkd-P_ryoA3jId3bNDKaMQOJfy7eBeYQ7N_D6w,7260
-keras_hub_nightly-0.19.0.dev202501080345.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
-keras_hub_nightly-0.19.0.dev202501080345.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
-keras_hub_nightly-0.19.0.dev202501080345.dist-info/RECORD,,
+keras_hub_nightly-0.19.0.dev202501150344.dist-info/METADATA,sha256=FhbHeGMBpOfmdE1bEoJdl34xtvV3n85LqFH_5STUyUo,7498
+keras_hub_nightly-0.19.0.dev202501150344.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+keras_hub_nightly-0.19.0.dev202501150344.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
+keras_hub_nightly-0.19.0.dev202501150344.dist-info/RECORD,,

{keras_hub_nightly-0.19.0.dev202501080345.dist-info → keras_hub_nightly-0.19.0.dev202501150344.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.7.0)
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{keras_hub_nightly-0.19.0.dev202501080345.dist-info → keras_hub_nightly-0.19.0.dev202501150344.dist-info}/top_level.txt RENAMED Viewed

File without changes

keras-hub-nightly 0.19.0.dev202501080345__py3-none-any.whl → 0.19.0.dev202501150344__py3-none-any.whl

keras-hub-nightly 0.19.0.dev202501080345py3-none-any.whl → 0.19.0.dev202501150344py3-none-any.whl