PyPI - keras-hub-nightly - Versions diffs - 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl - Mend

keras-hub-nightly 0.19.0.dev202412120352py3-none-any.whl → 0.19.0.dev202412140350py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

keras_hub/src/models/flux/flux_layers.py CHANGED Viewed

@@ -9,11 +9,10 @@ from keras_hub.src.models.flux.flux_maths import rearrange_symbolic_tensors
 class EmbedND(keras.Model):
-    """
-    Embedding layer for N-dimensional inputs using Rotary Positional Embedding (RoPE).
+    """Embedding layer for N-dimensional inputs using RoPE.
-    This layer applies RoPE embeddings across multiple axes of the input tensor and
-    concatenates the embeddings along a specified axis.
+    This layer applies RoPE embeddings across multiple axes of the input tensor
+    and concatenates the embeddings along a specified axis.
     Args:
         theta. Rotational angle parameter for RoPE.
@@ -32,14 +31,14 @@ class EmbedND(keras.Model):
             self.rope.build((input_shape[:-1] + (self.axes_dim[i],)))
     def call(self, ids):
-        """
-        Computes the positional embeddings for each axis and concatenates them.
+        """Computes the positional embeddings for each axis and concatenates.
         Args:
             ids: KerasTensor. Input tensor of shape (..., num_axes).
         Returns:
-            KerasTensor: Positional embeddings of shape (..., concatenated_dim, 1, ...).
+            KerasTensor: Positional embeddings of shape
+            (..., concatenated_dim, 1, ...).
         """
         n_axes = ids.shape[-1]
         emb = ops.concatenate(
@@ -54,8 +53,7 @@ class EmbedND(keras.Model):
 class MLPEmbedder(keras.Model):
-    """
-    A simple multi-layer perceptron (MLP) embedder model.
+    """A simple multi-layer perceptron (MLP) embedder model.
     This model applies a linear transformation followed by the SiLU activation
     function and another linear transformation to the input tensor.
@@ -76,15 +74,14 @@ class MLPEmbedder(keras.Model):
         self.output_layer.build((input_shape[0], self.input_layer.units))
     def call(self, x):
-        """
-        Applies the MLP embedding to the input tensor.
+        """Applies the MLP embedding to the input tensor.
         Args:
-            x: KerasTensor. Input tensor of shape (batch_size, in_dim).
+            x: Input tensor of shape (batch_size, in_dim).
         Returns:
-            KerasTensor: Output tensor of shape (batch_size, hidden_dim) after applying
-            the MLP transformations.
+            Output tensor of shape (batch_size, hidden_dim) after applying the
+            MLP transformations.
         """
         x = self.input_layer(x)
         x = self.silu(x)
@@ -92,11 +89,10 @@ class MLPEmbedder(keras.Model):
 class QKNorm(keras.layers.Layer):
-    """
-    A layer that applies RMS normalization to query and key tensors.
+    """A layer that applies RMS normalization to query and key tensors.
-    This layer normalizes the input query and key tensors using separate RMSNormalization
-    layers for each.
+    This layer normalizes the input query and key tensors using separate
+    RMSNormalization layers for each.
     Args:
         input_dim. The dimensionality of the input query and key tensors.
@@ -120,7 +116,8 @@ class QKNorm(keras.layers.Layer):
             k: KerasTensor. The key tensor of shape (batch_size, input_dim).
         Returns:
-            tuple[KerasTensor, KerasTensor]: A tuple containing the normalized query and key tensors.
+            tuple[KerasTensor, KerasTensor]: A tuple containing the normalized
+            query and key tensors.
         """
         q = self.query_norm(q)
         k = self.key_norm(k)
@@ -128,17 +125,17 @@ class QKNorm(keras.layers.Layer):
 class SelfAttention(keras.Model):
-    """
-    Multi-head self-attention layer with RoPE embeddings and RMS normalization.
+    """Multi-head self-attention layer with RoPE and RMS normalization.
     This layer performs self-attention over the input sequence and applies RMS
-    normalization to the query and key tensors before computing the attention scores.
+    normalization to the query and key tensors before computing the attention
+    scores.
     Args:
         dim: int. Dimensionality of the input tensor.
         num_heads: int. Number of attention heads. Default is 8.
-        use_bias: bool. Whether to use bias in the query, key, value projection layers.
-            Default is False.
+        use_bias: bool. Whether to use bias in the query, key, value projection
+            layers. Default is False.
     """
     def __init__(self, dim, num_heads=8, use_bias=False):
@@ -159,12 +156,12 @@ class SelfAttention(keras.Model):
         self.proj.build((None, input_shape[1], input_shape[-1]))
     def call(self, x, positional_encoding):
-        """
-        Applies self-attention with RoPE embeddings.
+        """Applies self-attention with RoPE embeddings.
         Args:
             x: KerasTensor. Input tensor of shape (batch_size, seq_len, dim).
-            positional_encoding: KerasTensor. Positional encoding tensor for RoPE.
+            positional_encoding: KerasTensor. Positional encoding tensor for
+                RoPE.
         Returns:
             KerasTensor: Output tensor after self-attention and projection.
@@ -180,12 +177,11 @@ class SelfAttention(keras.Model):
 class Modulation(keras.Model):
-    """
-    Modulation layer that produces shift, scale, and gate tensors.
+    """Modulation layer that produces shift, scale, and gate tensors.
-    This layer applies a SiLU activation to the input tensor followed by a linear
-    transformation to generate modulation parameters. It can optionally generate two
-    sets of modulation parameters.
+    This layer applies a SiLU activation to the input tensor followed by a
+    linear transformation to generate modulation parameters. It can optionally
+    generate two sets of modulation parameters.
     Args:
         dim: int. Dimensionality of the modulation output.
@@ -212,8 +208,9 @@ class Modulation(keras.Model):
             x: KerasTensor. Input tensor.
         Returns:
-            tuple[ModulationOut, ModulationOut | None]: A tuple containing the shift,
-            scale, and gate tensors. If `double` is True, returns two sets of modulation parameters.
+            tuple[ModulationOut, ModulationOut | None]: A tuple containing th
+            shift, scale, and gate tensors. If `double` is True, returns two
+            sets of modulation parameters.
         """
         x = keras.layers.Activation("silu")(x)
         out = self.linear_projection(x)
@@ -239,8 +236,10 @@ class DoubleStreamBlock(keras.Model):
     Args:
         hidden_size: int. The hidden dimension size for the model.
         num_heads: int. The number of attention heads.
-        mlp_ratio: float. The ratio of the MLP hidden dimension to the hidden size.
-        use_bias: bool, optional. Whether to include bias in QKV projection. Default is False.
+        mlp_ratio: float. The ratio of the MLP hidden dimension to the hidde
+            size.
+        use_bias: bool, optional. Whether to include bias in QKV projection.
+            Default is False.
     """
     def __init__(
@@ -292,13 +291,13 @@ class DoubleStreamBlock(keras.Model):
         Forward pass for the DoubleStreamBlock.
         Args:
-            image: KerasTensor. Input image tensor.
-            text: KerasTensor. Input text tensor.
-            modulation_encoding: KerasTensor. Modulation vector.
-            positional_encoding: KerasTensor. Positional encoding tensor.
+            image: Input image tensor.
+            text: Input text tensor.
+            modulation_encoding: Modulation vector.
+            positional_encoding: Positional encoding tensor.
         Returns:
-            Tuple[KerasTensor, KerasTensor]: The modified image and text tensors.
+            A `(image, text)` tuple of modified image and text tensors.
         """
         image_mod1, image_mod2 = self.image_mod(modulation_encoding)
         text_mod1, text_mod2 = self.text_mod(modulation_encoding)
@@ -367,8 +366,10 @@ class SingleStreamBlock(keras.Model):
     Args:
         hidden_size: int. The hidden dimension size for the model.
         num_heads: int. The number of attention heads.
-        mlp_ratio: float, optional. The ratio of the MLP hidden dimension to the hidden size. Default is 4.0.
-        qk_scale: float, optional. Scaling factor for the query-key product. Default is None.
+        mlp_ratio: float, optional. The ratio of the MLP hidden dimension to the
+            hidden size. Default is 4.0.
+        qk_scale: float, optional. Scaling factor for the query-key product.
+            Default is None.
     """
     def __init__(
@@ -443,7 +444,8 @@ class SingleStreamBlock(keras.Model):
         attn = self.attention(
             q, k=k, v=v, positional_encoding=positional_encoding
         )
-        # compute activation in mlp stream, cat again and run second linear layer
+        # compute activation in mlp stream, cat again and run second linear
+        # layer
         output = self.linear2(
             ops.concatenate(
                 (attn, keras.activations.gelu(mlp, approximate=True)), 2

keras_hub/src/models/flux/flux_maths.py CHANGED Viewed

@@ -3,19 +3,21 @@ from keras import ops
 class TimestepEmbedding(keras.layers.Layer):
-    """
-    Creates sinusoidal timestep embeddings.
+    """Creates sinusoidal timestep embeddings.
     Call arguments:
-        t: KerasTensor of shape (N,), representing N indices, one per batch element.
+        t: Tensor of shape (N,), representing N indices, one per batch element.
             These values may be fractional.
         dim: int. The dimension of the output.
-        max_period: int, optional. Controls the minimum frequency of the embeddings. Defaults to 10000.
-        time_factor: float, optional. A scaling factor applied to `t`. Defaults to 1000.0.
+        max_period: int, optional. Controls the minimum frequency of the
+            embeddings. Defaults to 10000.
+        time_factor: float, optional. A scaling factor applied to `t`. Defaults
+            to 1000.0.
     Returns:
-        KerasTensor: A tensor of shape (N, D) representing the positional embeddings,
-                     where N is the number of batch elements and D is the specified dimension `dim`.
+        A tensor of shape (N, D) representing the positional embeddings,
+        where N is the number of batch elements and D is the specified
+        dimension `dim`.
     """
     def call(self, t, dim, max_period=10000, time_factor=1000.0):
@@ -68,7 +70,8 @@ class ApplyRoPE(keras.layers.Layer):
     Call arguments:
         xq: KerasTensor. The query tensor of shape (..., L, D).
         xk: KerasTensor. The key tensor of shape (..., L, D).
-        freqs_cis: KerasTensor. The frequency complex numbers tensor with shape (..., 2).
+        freqs_cis: KerasTensor. The frequency complex numbers tensor with shape
+            `(..., 2)`.
     Returns:
         tuple[KerasTensor, KerasTensor]: The transformed query and key tensors.
@@ -91,12 +94,12 @@ class ApplyRoPE(keras.layers.Layer):
 class FluxRoPEAttention(keras.layers.Layer):
-    """
-    Computes the attention mechanism with the RoPE transformation applied to the query and key tensors.
+    """Computes the attention mechanism with RoPE.
     Args:
         dropout_p: float, optional. Dropout probability. Defaults to 0.0.
-        is_causal: bool, optional. If True, applies causal masking. Defaults to False.
+        is_causal: bool, optional. If True, applies causal masking. Defaults to
+            False.
     Call arguments:
         q: KerasTensor. Query tensor of shape (..., L, D).
@@ -122,12 +125,14 @@ class FluxRoPEAttention(keras.layers.Layer):
             q, k, v, dropout_p=self.dropout_p, is_causal=self.is_causal
         )
         x = ops.transpose(x, (0, 2, 1, 3))
-        b, l, h, d = ops.shape(x)
-        return ops.reshape(x, (b, l, h * d))
+        b, s, h, d = ops.shape(x)
+        return ops.reshape(x, (b, s, h * d))
-# TODO: This is probably already implemented in several places, but is needed to ensure numeric equivalence to the original
-# implementation. It uses torch.functional.scaled_dot_product_attention() - do we have an equivalent already in Keras?
+# TODO: This is probably already implemented in several places, but is needed to
+# ensure numeric equivalence to the original implementation. It uses
+# torch.functional.scaled_dot_product_attention() - do we have an equivalent
+# already in Keras?
 def scaled_dot_product_attention(
     query,
     key,
@@ -144,9 +149,11 @@ def scaled_dot_product_attention(
         query: KerasTensor. Query tensor of shape (..., L, D).
         key: KerasTensor. Key tensor of shape (..., S, D).
         value: KerasTensor. Value tensor of shape (..., S, D).
-        attn_mask: KerasTensor, optional. Attention mask tensor. Defaults to None.
+        attn_mask: KerasTensor, optional. Attention mask tensor. Defaults to
+            None.
         dropout_p: float, optional. Dropout probability. Defaults to 0.0.
-        is_causal: bool, optional. If True, applies causal masking. Defaults to False.
+        is_causal: bool, optional. If True, applies causal masking. Defaults to
+            False.
         scale: float, optional. Scale factor for attention. Defaults to None.
     Returns:

keras_hub/src/models/flux/flux_model.py CHANGED Viewed

@@ -12,41 +12,47 @@ from keras_hub.src.models.flux.flux_maths import TimestepEmbedding
 @keras_hub_export("keras_hub.models.FluxBackbone")
 class FluxBackbone(Backbone):
-    """
-    Transformer model for flow matching on sequences.
+    """Transformer model for flow matching on sequences.
+    The model processes image and text data with associated positional and
+    timestep embeddings, and optionally applies guidance embedding.
+    Double-stream blocks handle separate image and text streams, while
+    single-stream blocks combine these streams. Ported from:
+    https://github.com/black-forest-labs/flux
-    The model processes image and text data with associated positional and timestep
-    embeddings, and optionally applies guidance embedding. Double-stream blocks
-    handle separate image and text streams, while single-stream blocks combine
-    these streams. Ported from: https://github.com/black-forest-labs/flux
     Args:
         input_channels: int. The number of input channels.
-        hidden_size: int. The hidden size of the transformer, must be divisible by `num_heads`.
+        hidden_size: int. The hidden size of the transformer, must be divisible
+            by `num_heads`.
         mlp_ratio: float. The ratio of the MLP dimension to the hidden size.
         num_heads: int. The number of attention heads.
         depth: int. The number of double-stream blocks.
         depth_single_blocks: int. The number of single-stream blocks.
-        axes_dim: list[int]. A list of dimensions for the positional embedding axes.
+        axes_dim: list[int]. A list of dimensions for the positional embedding
+            axes.
         theta: int. The base frequency for positional embeddings.
-        use_bias: bool. Whether to apply bias to the query, key, and value projections.
+        use_bias: bool. Whether to apply bias to the query, key, and value
+            projections.
         guidance_embed: bool. If True, applies guidance embedding in the model.
     Call arguments:
-        image: KerasTensor. Image input tensor of shape (N, L, D) where N is the batch size,
-                L is the sequence length, and D is the feature dimension.
-        image_ids: KerasTensor. Image ID input tensor of shape (N, L, D) corresponding
-                to the image sequences.
+        image: KerasTensor. Image input tensor of shape (N, L, D) where N is the
+            batch size, L is the sequence length, and D is the feature
+            dimension.
+        image_ids: KerasTensor. Image ID input tensor of shape (N, L, D)
+            corresponding to the image sequences.
         text: KerasTensor. Text input tensor of shape (N, L, D).
-        text_ids: KerasTensor. Text ID input tensor of shape (N, L, D) corresponding
-            to the text sequences.
-        timesteps: KerasTensor. Timestep tensor used to compute positional embeddings.
+        text_ids: KerasTensor. Text ID input tensor of shape (N, L, D)
+            corresponding to the text sequences.
+        timesteps: KerasTensor. Timestep tensor used to compute positional
+            embeddings.
         y: KerasTensor. Additional vector input, such as target values.
         guidance: KerasTensor, optional. Guidance input tensor used
             in guidance-embedded models.
     Raises:
-        ValueError: If `hidden_size` is not divisible by `num_heads`, or if `sum(axes_dim)` is not equal to the
-                    positional embedding dimension.
+        ValueError: If `hidden_size` is not divisible by `num_heads`, or if
+            `sum(axes_dim)` is not equal to the positional embedding dimension.
     """
     def __init__(
@@ -69,7 +75,6 @@ class FluxBackbone(Backbone):
         y_shape=(None, 128),
         **kwargs,
     ):
         # === Layers ===
         self.positional_embedder = EmbedND(theta=theta, axes_dim=axes_dim)
         self.image_input_embedder = keras.layers.Dense(

keras_hub/src/models/flux/flux_presets.py CHANGED Viewed

@@ -4,7 +4,8 @@ presets = {
     "schnell": {
         "metadata": {
             "description": (
-                "A 12 billion parameter rectified flow transformer capable of generating images from text descriptions."
+                "A 12 billion parameter rectified flow transformer capable of "
+                "generating images from text descriptions."
             ),
             "params": 124439808,
             "path": "flux",

keras_hub/src/models/flux/flux_text_to_image.py CHANGED Viewed

@@ -24,11 +24,15 @@ class FluxTextToImage(TextToImage):
     Use `generate()` to do image generation.
     ```python
+    prompt = (
+        "Astronaut in a jungle, cold color palette, muted colors, "
+        "detailed, 8k"
+    )
     text_to_image = keras_hub.models.FluxTextToImage.from_preset(
         "TBA", height=512, width=512
     )
     text_to_image.generate(
-        "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
+        prompt
     )
     # Generate with batched prompts.
@@ -38,7 +42,7 @@ class FluxTextToImage(TextToImage):
     # Generate with different `num_steps` and `guidance_scale`.
     text_to_image.generate(
-        "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
+        prompt,
         num_steps=50,
         guidance_scale=5.0,
     )
@@ -46,7 +50,7 @@ class FluxTextToImage(TextToImage):
     # Generate with `negative_prompts`.
     text_to_image.generate(
         {
-            "prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
+            "prompts": prompt,
             "negative_prompts": "green color",
         }
     )

keras_hub/src/models/gemma/gemma_backbone.py CHANGED Viewed

@@ -44,10 +44,10 @@ class GemmaBackbone(Backbone):
             `hidden_dim / num_query_heads`. Defaults to True.
         use_post_ffw_norm: boolean. Whether to normalize after the feedforward
             block. Defaults to False.
-        use_post_attention_norm: boolean. Whether to normalize after the attention
-            block. Defaults to False.
-        attention_logit_soft_cap: None or int. Soft cap for the attention logits.
-            Defaults to None.
+        use_post_attention_norm: boolean. Whether to normalize after the
+            attention block. Defaults to False.
+        attention_logit_soft_cap: None or int. Soft cap for the attention
+            logits. Defaults to None.
         final_logit_soft_cap: None or int. Soft cap for the final logits.
             Defaults to None.
         use_sliding_window_attention boolean. Whether to use sliding local
@@ -205,7 +205,9 @@ class GemmaBackbone(Backbone):
                 "final_logit_soft_cap": self.final_logit_soft_cap,
                 "attention_logit_soft_cap": self.attention_logit_soft_cap,
                 "sliding_window_size": self.sliding_window_size,
-                "use_sliding_window_attention": self.use_sliding_window_attention,
+                "use_sliding_window_attention": (
+                    self.use_sliding_window_attention
+                ),
             }
         )
         return config
@@ -224,7 +226,8 @@ class GemmaBackbone(Backbone):
         Example:
         ```
-        # Feel free to change the mesh shape to balance data and model parallelism
+        # Feel free to change the mesh shape to balance data and model
+        # parallelism
         mesh = keras.distribution.DeviceMesh(
             shape=(1, 8), axis_names=('batch', 'model'),
             devices=keras.distribution.list_devices())
@@ -237,12 +240,16 @@ class GemmaBackbone(Backbone):
            gemma_model = keras_hub.models.GemmaCausalLM.from_preset()
         ```
-        To see how the layout map was applied, load the model then run (for one decoder block):
+        To see how the layout map was applied, load the model then run (for one
+        decoder block):
         ```
         embedding_layer = gemma_model.backbone.get_layer("token_embedding")
         decoder_block_1 = gemma_model.backbone.get_layer('decoder_block_1')
         for variable in embedding_layer.weights + decoder_block_1.weights:
-            print(f'{variable.path:<58}  {str(variable.shape):<16}  {str(variable.value.sharding.spec)}')
+            print(
+                f'{variable.path:<58}  {str(variable.shape):<16} '
+                f'{str(variable.value.sharding.spec)}'
+            )
         ```
         Args:
@@ -257,22 +264,22 @@ class GemmaBackbone(Backbone):
             for all the model weights.
         """
         # The weight path and shape of the Gemma backbone is like below (for 2G)
-        # token_embedding/embeddings,  (256128, 2048), 524550144
+        # token_embedding/embeddings,  (256128, 2048)
         # repeat block for decoder
         # ...
-        # decoder_block_17/pre_attention_norm/scale,  (2048,), 2048
-        # decoder_block_17/attention/query/kernel,  (8, 2048, 256), 4194304
-        # decoder_block_17/attention/key/kernel,  (8, 2048, 256), 4194304
-        # decoder_block_17/attention/value/kernel,  (8, 2048, 256), 4194304
-        # decoder_block_17/attention/attention_output/kernel,  (8, 256, 2048), 4194304
-        # decoder_block_17/pre_ffw_norm/scale,  (2048,), 2048
-        # decoder_block_17/ffw_gating/kernel,  (2048, 16384), 33554432
-        # decoder_block_17/ffw_gating_2/kernel,  (2048, 16384), 33554432
-        # decoder_block_17/ffw_linear/kernel,  (16384, 2048), 33554432
+        # decoder_block_17/pre_attention_norm/scale,  (2048,)
+        # decoder_block_17/attention/query/kernel,  (8, 2048, 256)
+        # decoder_block_17/attention/key/kernel,  (8, 2048, 256)
+        # decoder_block_17/attention/value/kernel,  (8, 2048, 256)
+        # decoder_block_17/attention/attention_output/kernel,  (8, 256, 2048)
+        # decoder_block_17/pre_ffw_norm/scale,  (2048,)
+        # decoder_block_17/ffw_gating/kernel,  (2048, 16384)
+        # decoder_block_17/ffw_gating_2/kernel,  (2048, 16384)
+        # decoder_block_17/ffw_linear/kernel,  (16384, 2048)
         if not isinstance(device_mesh, keras.distribution.DeviceMesh):
             raise ValueError(
-                "Invalid device_mesh type. Expected `keras.distribution.Device`,"
-                f" got {type(device_mesh)}"
+                "Invalid device_mesh type. Expected "
+                f"`keras.distribution.Device`, got {type(device_mesh)}"
             )
         if model_parallel_dim_name not in device_mesh.axis_names:
             raise ValueError(

keras_hub/src/models/gemma/gemma_causal_lm.py CHANGED Viewed

@@ -187,8 +187,8 @@ class GemmaCausalLM(CausalLM):
         Args:
             token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
             cache: a dense float Tensor, the cache of key and value.
-            cache_update_index: int, or int Tensor. The index of current inputs in the
-                whole sequence.
+            cache_update_index: int, or int Tensor. The index of current inputs
+                in the whole sequence.
         Returns:
             A (logits, hidden_states, cache) tuple. Where `logits` is the

keras_hub/src/models/gemma/gemma_decoder_block.py CHANGED Viewed

@@ -220,7 +220,9 @@ class GemmaDecoderBlock(keras.layers.Layer):
                 "use_post_ffw_norm": self.use_post_ffw_norm,
                 "use_post_attention_norm": self.use_post_attention_norm,
                 "logit_soft_cap": self.logit_soft_cap,
-                "use_sliding_window_attention": self.use_sliding_window_attention,
+                "use_sliding_window_attention": (
+                    self.use_sliding_window_attention
+                ),
                 "sliding_window_size": self.sliding_window_size,
                 "query_head_dim_normalize": self.query_head_dim_normalize,
             }

keras_hub/src/models/gemma/gemma_presets.py CHANGED Viewed

@@ -130,7 +130,9 @@ backbone_presets = {
     },
     "gemma2_instruct_2b_en": {
         "metadata": {
-            "description": "2 billion parameter, 26-layer, instruction tuned Gemma model.",
+            "description": (
+                "2 billion parameter, 26-layer, instruction tuned Gemma model."
+            ),
             "params": 2614341888,
             "path": "gemma",
         },
@@ -146,7 +148,9 @@ backbone_presets = {
     },
     "gemma2_instruct_9b_en": {
         "metadata": {
-            "description": "9 billion parameter, 42-layer, instruction tuned Gemma model.",
+            "description": (
+                "9 billion parameter, 42-layer, instruction tuned Gemma model."
+            ),
             "params": 9241705984,
             "path": "gemma",
         },
@@ -162,7 +166,9 @@ backbone_presets = {
     },
     "gemma2_instruct_27b_en": {
         "metadata": {
-            "description": "27 billion parameter, 42-layer, instruction tuned Gemma model.",
+            "description": (
+                "27 billion parameter, 42-layer, instruction tuned Gemma model."
+            ),
             "params": 27227128320,
             "path": "gemma",
         },

keras_hub/src/models/gpt2/gpt2_causal_lm.py CHANGED Viewed

@@ -172,8 +172,8 @@ class GPT2CausalLM(CausalLM):
         Args:
             token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
             cache: a dense float Tensor, the cache of key and value.
-            cache_update_index: int, or int Tensor. The index of current inputs in the
-                whole sequence.
+            cache_update_index: int, or int Tensor. The index of current inputs
+                in the whole sequence.
         Returns:
             A (logits, hidden_states, cache) tuple. Where `logits` is the

keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py CHANGED Viewed

@@ -202,7 +202,8 @@ class GPTNeoXAttention(keras.layers.Layer):
             training=training,
         )
-        # Reshape `attention_output` to `(batch_size, sequence_length, hidden_dim)`.
+        # Reshape `attention_output` to
+        # `(batch_size, sequence_length, hidden_dim)`.
         attention_output = ops.reshape(
             attention_output,
             [

keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py CHANGED Viewed

@@ -27,9 +27,9 @@ class GPTNeoXCausalLM(CausalLM):
     Args:
         backbone: A `keras_hub.models.GPTNeoXBackbone` instance.
-        preprocessor: A `keras_hub.models.GPTNeoXCausalLMPreprocessor` or `None`.
-            If `None`, this model will not apply preprocessing, and inputs
-            should be preprocessed before calling the model.
+        preprocessor: A `keras_hub.models.GPTNeoXCausalLMPreprocessor` or
+            `None`. If `None`, this model will not apply preprocessing, and
+            inputs should be preprocessed before calling the model.
     """
     backbone_cls = GPTNeoXBackbone

keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py CHANGED Viewed

@@ -16,7 +16,8 @@ class GPTNeoXDecoder(keras.layers.Layer):
     This class follows the architecture of the GPT-NeoX decoder layer in the
     paper [GPT-NeoX-20B: An Open-Source Autoregressive Language Model](https://arxiv.org/abs/2204.06745).
-    Users can instantiate multiple instances of this class to stack up a decoder.
+    Users can instantiate multiple instances of this class to stack up a
+    decoder.
     This layer will always apply a causal mask to the decoder attention layer.

keras_hub/src/models/image_classifier_preprocessor.py CHANGED Viewed

@@ -46,7 +46,10 @@ class ImageClassifierPreprocessor(Preprocessor):
     x, y = preprocessor(x, y)
     # Resize a batch of labeled images.
-    x, y = [np.random.randint(0, 256, (512, 512, 3)), np.zeros((512, 512, 3))], [1, 0]
+    x, y = [
+        np.random.randint(0, 256, (512, 512, 3)),
+        np.zeros((512, 512, 3))
+    ], [1, 0]
     x, y = preprocessor(x, y)
     # Use a `tf.data.Dataset`.

keras_hub/src/models/image_object_detector.py CHANGED Viewed

@@ -31,8 +31,8 @@ class ImageObjectDetector(Task):
     ):
         """Configures the `ImageObjectDetector` task for training.
-        The `ImageObjectDetector` task extends the default compilation signature of
-        `keras.Model.compile` with defaults for `optimizer`, `loss`, and
+        The `ImageObjectDetector` task extends the default compilation signature
+        of `keras.Model.compile` with defaults for `optimizer`, `loss`, and
         `metrics`. To override these defaults, pass any value
         to these arguments during compilation.

keras_hub/src/models/image_object_detector_preprocessor.py CHANGED Viewed

@@ -21,10 +21,10 @@ class ImageObjectDetectorPreprocessor(Preprocessor):
     be the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4),
     "classes": (batch_size, num_boxes)}.
-    The layer will returns either `x`, an `(x, y)` tuple if labels were provided,
-    or an `(x, y, sample_weight)` tuple if labels and sample weight were
-    provided. `x` will be the input images after all model preprocessing has
-    been applied.
+    The layer will returns either `x`, an `(x, y)` tuple if labels were
+    provided, or an `(x, y, sample_weight)` tuple if labels and sample weight
+    were provided. `x` will be the input images after all model preprocessing
+    has been applied.
     All `ImageObjectDetectorPreprocessor` tasks include a `from_preset()`
     constructor which can be used to load a pre-trained config and vocabularies.

keras-hub-nightly 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl

keras-hub-nightly 0.19.0.dev202412120352py3-none-any.whl → 0.19.0.dev202412140350py3-none-any.whl