PyPI - keras-hub-nightly - Versions diffs - 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl - Mend

keras-hub-nightly 0.19.0.dev202412120352py3-none-any.whl → 0.19.0.dev202412140350py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py CHANGED Viewed

@@ -3,8 +3,7 @@ from keras import ops
 class ContentAndQueryEmbedding(keras.layers.Layer):
-    """
-    Content and Query Embedding.
+    """Content and Query Embedding.
     This class creates Content and Query Embeddings for XLNet model
     which is later used in XLNet Encoder.
@@ -20,9 +19,8 @@ class ContentAndQueryEmbedding(keras.layers.Layer):
         **kwargs: other keyword arguments.
     References:
-     - [XLNet: Generalized Autoregressive Pretraining for Language Understanding]
-     (https://arxiv.org/abs/1906.08237)
-    """
+     - [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237)
+    """  # noqa: E501
     def __init__(
         self, vocabulary_size, hidden_dim, dropout, name=None, **kwargs

keras_hub/src/models/xlnet/xlnet_encoder.py CHANGED Viewed

@@ -11,17 +11,16 @@ def xlnet_kernel_initializer(stddev=0.02):
 class XLNetEncoder(keras.layers.Layer):
-    """
-    XLNet Encoder.
+    """XLNet Encoder.
     This class follows the architecture of the transformer encoder layer in the
     paper [Attention is All You Need](https://arxiv.org/abs/1706.03762). Users
     can instantiate multiple instances of this class to stack up an encoder.
     Contrary to the single hidden state used in the paper mentioned above, this
-    Encoder uses two hidden states, Content State and Query State. Thus calculates
-    Two Stream Relative Attention using both of the hidden states. To know more
-    please check the reference.
+    Encoder uses two hidden states, Content State and Query State. Thus
+    calculates Two Stream Relative Attention using both of the hidden states.
+    To know more please check the reference.
     Args:
         num_heads: int, the number of heads in the
@@ -44,9 +43,8 @@ class XLNetEncoder(keras.layers.Layer):
         **kwargs: other keyword arguments.
     References:
-     - [XLNet: Generalized Autoregressive Pretraining for Language Understanding]
-     (https://arxiv.org/abs/1906.08237)
-    """
+     - [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237)
+    """  # noqa: E501
     def __init__(
         self,
@@ -60,7 +58,7 @@ class XLNetEncoder(keras.layers.Layer):
         kernel_initializer_range=0.02,
         bias_initializer="zeros",
         name=None,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(name=name, **kwargs)
         self.num_heads = num_heads

keras_hub/src/samplers/contrastive_sampler.py CHANGED Viewed

@@ -150,9 +150,8 @@ class ContrastiveSampler(Sampler):
             # The final score of each candidate token is weighted sum of
             # probability and similarity against previous tokens.
             accumulated_scores = (
-                (1 - self.alpha) * next_token_probabilities
-                - self.alpha * max_similarity_scores
-            )
+                1 - self.alpha
+            ) * next_token_probabilities - self.alpha * max_similarity_scores
             # Unflatten variables to shape [batch_size, self.k, ...] for
             # gather purpose.
             unflat_score = unflatten_beams(accumulated_scores)

keras_hub/src/samplers/sampler.py CHANGED Viewed

@@ -95,7 +95,8 @@ class Sampler:
         def cond(prompt, cache, index):
             if stop_token_ids is None:
                 return True
-            # Stop if all sequences have produced a *new* id from stop_token_ids.
+            # Stop if all sequences have produced a *new* id from
+            # stop_token_ids.
             end_tokens = any_equal(prompt, stop_token_ids, ~mask)
             prompt_done = ops.any(end_tokens, axis=-1)
             return ops.logical_not(ops.all(prompt_done))

keras_hub/src/tests/test_case.py CHANGED Viewed

@@ -458,8 +458,8 @@ class TestCase(tf.test.TestCase, parameterized.TestCase):
             # Check variable length sequences.
             if variable_length_data is None:
-                # If no variable length data passed, assume the second axis of all
-                # inputs is our sequence axis and create it ourselves.
+                # If no variable length data passed, assume the second axis of
+                # all inputs is our sequence axis and create it ourselves.
                 variable_length_data = [
                     tree.map_structure(
                         lambda x: x[:, :seq_length, ...], input_data

keras_hub/src/tokenizers/byte_pair_tokenizer.py CHANGED Viewed

@@ -200,8 +200,8 @@ class BytePairTokenizer(tokenizer.Tokenizer):
     """Bype-pair encoding tokenizer layer.
     This BPE tokenizer provides the same functionality as the official GPT-2
-    tokenizer. Given the same `vocabulary` which maps tokens to ids, and `merges`
-    which describes BPE merge rules, it should provide the same output
+    tokenizer. Given the same `vocabulary` which maps tokens to ids, and
+    `merges` which describes BPE merge rules, it should provide the same output
     as OpenAI implementation (https://github.com/openai/gpt-2/blob/master/src/encoder.py).
     Different from OpenAI, this implementation is graph-compatible, so you can
     use it within a `tf.data` pipeline.

keras_hub/src/tokenizers/byte_tokenizer.py CHANGED Viewed

@@ -1,13 +1,5 @@
 import numpy as np
-try:
-    import tensorflow as tf
-except ImportError:
-    raise ImportError(
-        "To use `keras_hub`, please install Tensorflow: `pip install tensorflow`. "
-        "The TensorFlow package is required for data preprocessing with any backend."
-    )
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.tokenizers import tokenizer
 from keras_hub.src.utils.tensor_utils import convert_to_ragged_batch
@@ -15,8 +7,10 @@ from keras_hub.src.utils.tensor_utils import is_int_dtype
 from keras_hub.src.utils.tensor_utils import preprocessing_function
 try:
+    import tensorflow as tf
     import tensorflow_text as tf_text
 except ImportError:
+    tf = None
     tf_text = None

keras_hub/src/tokenizers/sentence_piece_tokenizer.py CHANGED Viewed

@@ -4,14 +4,6 @@ import os
 import keras
-try:
-    import tensorflow as tf
-except ImportError:
-    raise ImportError(
-        "To use `keras_hub`, please install Tensorflow: `pip install tensorflow`. "
-        "The TensorFlow package is required for data preprocessing with any backend."
-    )
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.tokenizers import tokenizer
 from keras_hub.src.utils.tensor_utils import convert_to_ragged_batch
@@ -21,11 +13,12 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function
 from keras_hub.src.utils.tensor_utils import tensor_to_list
 try:
+    import tensorflow as tf
     import tensorflow_text as tf_text
 except ImportError:
+    tf = None
     tf_text = None
 VOCAB_FILENAME = "vocabulary.spm"

keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py CHANGED Viewed

@@ -1,17 +1,11 @@
 import io
-try:
-    import tensorflow as tf
-except ImportError:
-    raise ImportError(
-        "To use `keras_hub`, please install Tensorflow: `pip install tensorflow`. "
-        "The TensorFlow package is required for data preprocessing with any backend."
-    )
 try:
     import sentencepiece as spm
+    import tensorflow as tf
 except ImportError:
     spm = None
+    tf = None
 from keras_hub.src.api_export import keras_hub_export
@@ -52,7 +46,8 @@ def compute_sentence_piece_proto(
     Basic Usage (from Dataset).
     >>> inputs = tf.data.Dataset.from_tensor_slices(["Drifting Along"])
-    >>> proto = keras_hub.tokenizers.compute_sentence_piece_proto(inputs, vocabulary_size=15)
+    >>> proto = keras_hub.tokenizers.compute_sentence_piece_proto(
+    ...     inputs, vocabulary_size=15)
     >>> tokenizer = keras_hub.tokenizers.SentencePieceTokenizer(proto=proto)
     >>> outputs = inputs.map(tokenizer)
     >>> for output in outputs:
@@ -92,7 +87,8 @@ def compute_sentence_piece_proto(
     if not isinstance(data, (list, tuple, tf.data.Dataset)):
         raise ValueError(
-            "The `data` argument must be either `tf.data.Dataset` or `tuple` or `list`. "
+            "The `data` argument must be either `tf.data.Dataset` or "
+            "`tuple` or `list`. "
             f"Received: type(data)={type(data)}."
         )
@@ -105,8 +101,7 @@ def compute_sentence_piece_proto(
     model_writer = (
         open(proto_output_file, "wb") if proto_output_file else io.BytesIO()
     )
-    is_dataset = isinstance(data, tf.data.Dataset)
-    if is_dataset:
+    if tf is not None and isinstance(data, tf.data.Dataset):
         spm.SentencePieceTrainer.train(
             sentence_iterator=data.as_numpy_iterator(),
             model_writer=model_writer,

keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py CHANGED Viewed

@@ -226,8 +226,9 @@ class UnicodeCodepointTokenizer(tokenizer.Tokenizer):
         if normalization_form:
             if input_encoding != "UTF-8":
                 raise ValueError(
-                    """Normalization Forms are Only Supported for Input Encoding
-                     UTF-8"""
+                    "Normalization Forms are Only Supported for Input "
+                    "Encoding UTF-8"
+                    ""
                 )
         super().__init__(dtype=dtype, **kwargs)
@@ -259,8 +260,9 @@ class UnicodeCodepointTokenizer(tokenizer.Tokenizer):
         return config
     def vocabulary_size(self):
-        """Get the size of the tokenizer vocabulary. None implies no vocabulary
-        size was provided"""
+        """Get the size of the tokenizer vocabulary.
+        None implies no vocabulary size was provided"""
         return self._vocabulary_size
     def get_vocabulary(self):
@@ -334,6 +336,7 @@ class UnicodeCodepointTokenizer(tokenizer.Tokenizer):
         id = ord(token)
         if id >= self.vocabulary_size():
             raise ValueError(
-                f"Token {token} is not supported by `UnicodeCodepointTokenizer`."
+                f"Token {token} is not supported by "
+                "`UnicodeCodepointTokenizer`."
             )
         return id

keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py CHANGED Viewed

@@ -55,7 +55,8 @@ def compute_word_piece_vocabulary(
         suffix_indicator: str. The characters prepended to a
             WordPiece to indicate that it is a suffix to another subword.
             E.g. `"##ing"`. Defaults to `"##"`.
-        reserved_tokens: list of strings. A list of tokens that must be included in the vocabulary.
+        reserved_tokens: list of strings. A list of tokens that must be included
+            in the vocabulary.
     Returns:
         Returns a list of vocabulary terms.
@@ -67,7 +68,10 @@ def compute_word_piece_vocabulary(
     >>> vocab = compute_word_piece_vocabulary(inputs, 13)
     >>> vocab
     ['[PAD]', '[CLS]', '[SEP]', '[UNK]', '[MASK]', 'a', 'b', 'm', 'p', 'r', 's', 't', '##at']
-    >>> tokenizer = keras_hub.tokenizers.WordPieceTokenizer(vocabulary=vocab, oov_token="[UNK]")
+    >>> tokenizer = keras_hub.tokenizers.WordPieceTokenizer(
+    ...     vocabulary=vocab,
+    ...     oov_token="[UNK]",
+    ... )
     >>> outputs = inputs.map(tokenizer.tokenize)
     >>> for x in outputs:
     ...     print(x)
@@ -112,7 +116,7 @@ def compute_word_piece_vocabulary(
     tokenizer = keras_hub.tokenizers.WordPieceTokenizer(vocabulary=vocab)
     inputs.map(tokenizer.tokenize)
     ```
-    """
+    """  # noqa: E501
     # Read data files.
     if not isinstance(data, (list, tf.data.Dataset)):
         raise ValueError(

keras_hub/src/utils/preset_utils.py CHANGED Viewed

@@ -16,8 +16,9 @@ try:
     import tensorflow as tf
 except ImportError:
     raise ImportError(
-        "To use `keras_hub`, please install Tensorflow: `pip install tensorflow`. "
-        "The TensorFlow package is required for data preprocessing with any backend."
+        "To use `keras_hub`, please install Tensorflow: "
+        "`pip install tensorflow`. The TensorFlow package is required for data "
+        "preprocessing with any backend."
     )
 try:
@@ -191,7 +192,8 @@ def get_file(preset, path):
     elif scheme == HF_SCHEME:
         if huggingface_hub is None:
             raise ImportError(
-                f"`from_preset()` requires the `huggingface_hub` package to load from '{preset}'. "
+                "`from_preset()` requires the `huggingface_hub` package to "
+                "load from '{preset}'. "
                 "Please install with `pip install huggingface_hub`."
             )
         hf_handle = preset.removeprefix(HF_SCHEME + "://")
@@ -225,7 +227,8 @@ def get_file(preset, path):
         raise ValueError(
             "Unknown preset identifier. A preset must be a one of:\n"
             "1) a built-in preset identifier like `'bert_base_en'`\n"
-            "2) a Kaggle Models handle like `'kaggle://keras/bert/keras/bert_base_en'`\n"
+            "2) a Kaggle Models handle like "
+            "`'kaggle://keras/bert/keras/bert_base_en'`\n"
             "3) a Hugging Face handle like `'hf://username/bert_base_en'`\n"
             "4) a path to a local preset directory like `'./bert_base_en`\n"
             "Use `print(cls.presets.keys())` to view all built-in presets for "
@@ -342,8 +345,8 @@ def create_model_card(preset):
         markdown_content += f"* **{k}:** {v}\n"
     markdown_content += "\n"
     markdown_content += (
-        "This model card has been generated automatically and should be completed "
-        "by the model author. See [Model Cards documentation]"
+        "This model card has been generated automatically and should be "
+        "completed by the model author. See [Model Cards documentation]"
         "(https://huggingface.co/docs/hub/model-cards) for more information.\n"
     )
@@ -388,20 +391,22 @@ def upload_preset(
     if uri.startswith(KAGGLE_PREFIX):
         if kagglehub is None:
             raise ImportError(
-                "Uploading a model to Kaggle Hub requires the `kagglehub` package. "
-                "Please install with `pip install kagglehub`."
+                "Uploading a model to Kaggle Hub requires the `kagglehub` "
+                "package. Please install with `pip install kagglehub`."
             )
         if parse(kagglehub.__version__) < parse("0.2.4"):
             raise ImportError(
-                "Uploading a model to Kaggle Hub requires the `kagglehub` package version `0.2.4` or higher. "
-                "Please upgrade with `pip install --upgrade kagglehub`."
+                "Uploading a model to Kaggle Hub requires the `kagglehub` "
+                "package version `0.2.4` or higher. Please upgrade with "
+                "`pip install --upgrade kagglehub`."
             )
         kaggle_handle = uri.removeprefix(KAGGLE_PREFIX)
         kagglehub.model_upload(kaggle_handle, preset)
     elif uri.startswith(HF_PREFIX):
         if huggingface_hub is None:
             raise ImportError(
-                f"`upload_preset()` requires the `huggingface_hub` package to upload to '{uri}'. "
+                f"`upload_preset()` requires the `huggingface_hub` package "
+                f"to upload to '{uri}'. "
                 "Please install with `pip install huggingface_hub`."
             )
         hf_handle = uri.removeprefix(HF_PREFIX)
@@ -413,14 +418,15 @@ def upload_preset(
             raise ValueError(
                 "Unexpected Hugging Face URI. Hugging Face model handles "
                 "should have the form 'hf://[{org}/]{model}'. For example, "
-                "'hf://username/bert_base_en' or 'hf://bert_case_en' to implicitly"
-                f"upload to your user account. Received: URI={uri}."
+                "'hf://username/bert_base_en' or 'hf://bert_case_en' to "
+                f"implicitly upload to your user account. Received: URI={uri}."
             ) from e
         has_model_card = huggingface_hub.file_exists(
             repo_id=repo_url.repo_id, filename=README_FILE
         )
         if not has_model_card:
-            # Remote repo doesn't have a model card so a basic model card is automatically generated.
+            # Remote repo doesn't have a model card so a basic model card is
+            # automatically generated.
             create_model_card(preset)
         try:
             huggingface_hub.upload_folder(
@@ -428,13 +434,14 @@ def upload_preset(
             )
         finally:
             if not has_model_card:
-                # Clean up the preset directory in case user attempts to upload the
-                # preset directory into Kaggle hub as well.
+                # Clean up the preset directory in case user attempts to upload
+                # the preset directory into Kaggle hub as well.
                 delete_model_card(preset)
     else:
         raise ValueError(
             "Unknown URI. An URI must be a one of:\n"
-            "1) a Kaggle Model handle like `'kaggle://<KAGGLE_USERNAME>/<MODEL>/<FRAMEWORK>/<VARIATION>'`\n"
+            "1) a Kaggle Model handle like "
+            "`'kaggle://<KAGGLE_USERNAME>/<MODEL>/<FRAMEWORK>/<VARIATION>'`\n"
             "2) a Hugging Face handle like `'hf://[<HF_USERNAME>/]<MODEL>'`\n"
             f"Received: uri='{uri}'."
         )
@@ -778,7 +785,7 @@ class KerasPresetSaver:
         # E.g. for `BertBackbone` we would have `TextClassifier` and `MaskedLM`.
         # For `ResNetBackbone` we would have `ImageClassifier`.
         tasks = list_subclasses(Task)
-        tasks = filter(lambda x: x.backbone_cls == type(layer), tasks)
+        tasks = filter(lambda x: x.backbone_cls is type(layer), tasks)
         tasks = [task.__base__.__name__ for task in tasks]
         keras_version = keras.version() if hasattr(keras, "version") else None

keras_hub/src/utils/tensor_utils.py CHANGED Viewed

@@ -293,10 +293,10 @@ def any_equal(inputs, values, padding_mask):
     Args:
         inputs: Input tensor.
-        values: List or iterable of tensors shaped like `inputs` or broadcastable
-            by bit operators.
-        padding_mask: Tensor with shape compatible with inputs that will condition
-            output.
+        values: List or iterable of tensors shaped like `inputs` or
+            broadcastable by bit operators.
+        padding_mask: Tensor with shape compatible with inputs that will
+            condition output.
     Returns:
         A tensor with `inputs` shape where each position is True if it contains

keras_hub/src/utils/timm/convert_efficientnet.py CHANGED Viewed

@@ -198,10 +198,10 @@ def convert_weights(backbone, loader, timm_config):
         port_bias=True,
         depth_multiplier=1,
     ):
         def convert_pt_conv2d_kernel(pt_kernel):
             out_channels, in_channels_per_group, height, width = pt_kernel.shape
-            # PT Convs are depthwise convs if and only if in_channels_per_group == 1
+            # PT Convs are depthwise convs if and only if
+            # `in_channels_per_group == 1`
             assert in_channels_per_group == 1
             pt_kernel = np.transpose(pt_kernel, (2, 3, 0, 1))
             in_channels = out_channels // depth_multiplier
@@ -248,7 +248,6 @@ def convert_weights(backbone, loader, timm_config):
     num_stacks = len(backbone.stackwise_kernel_sizes)
     for stack_index in range(num_stacks):
         block_type = backbone.stackwise_block_types[stack_index]
         expansion_ratio = backbone.stackwise_expansion_ratios[stack_index]
         repeats = backbone.stackwise_num_repeats[stack_index]
@@ -263,7 +262,6 @@ def convert_weights(backbone, loader, timm_config):
         ]
         for block_idx in range(repeats):
             conv_pw_count = 0
             bn_count = 1

keras_hub/src/utils/transformers/convert_vit.py ADDED Viewed

@@ -0,0 +1,150 @@
+import numpy as np
+from keras_hub.src.models.vit.vit_backbone import ViTBackbone
+backbone_cls = ViTBackbone
+def convert_backbone_config(transformers_config):
+    image_size = transformers_config["image_size"]
+    return {
+        "image_shape": (image_size, image_size, 3),
+        "patch_size": transformers_config["patch_size"],
+        "num_layers": transformers_config["num_hidden_layers"],
+        "num_heads": transformers_config["num_attention_heads"],
+        "hidden_dim": transformers_config["hidden_size"],
+        "mlp_dim": transformers_config["intermediate_size"],
+        "dropout_rate": transformers_config["hidden_dropout_prob"],
+        "attention_dropout": transformers_config[
+            "attention_probs_dropout_prob"
+        ],
+        "use_mha_bias": transformers_config["qkv_bias"],
+    }
+def convert_weights(backbone, loader, transformers_config):
+    def port_ln(keras_variable, weight_key):
+        loader.port_weight(keras_variable.gamma, f"{weight_key}.weight")
+        loader.port_weight(keras_variable.beta, f"{weight_key}.bias")
+    def port_dense(keras_variable, weight_key):
+        loader.port_weight(
+            keras_variable.kernel,
+            f"{weight_key}.weight",
+            hook_fn=lambda x, _: x.T,
+        )
+        if keras_variable.bias is not None:
+            loader.port_weight(keras_variable.bias, f"{weight_key}.bias")
+    def port_mha(keras_variable, weight_key, num_heads, hidden_dim):
+        # query
+        loader.port_weight(
+            keras_variable.query_dense.kernel,
+            f"{weight_key}.attention.query.weight",
+            hook_fn=lambda x, _: np.reshape(
+                x.T, (hidden_dim, num_heads, hidden_dim // num_heads)
+            ),
+        )
+        loader.port_weight(
+            keras_variable.query_dense.bias,
+            f"{weight_key}.attention.query.bias",
+            hook_fn=lambda x, _: np.reshape(
+                x, (num_heads, hidden_dim // num_heads)
+            ),
+        )
+        # key
+        loader.port_weight(
+            keras_variable.key_dense.kernel,
+            f"{weight_key}.attention.key.weight",
+            hook_fn=lambda x, _: np.reshape(
+                x.T, (hidden_dim, num_heads, hidden_dim // num_heads)
+            ),
+        )
+        loader.port_weight(
+            keras_variable.key_dense.bias,
+            f"{weight_key}.attention.key.bias",
+            hook_fn=lambda x, _: np.reshape(
+                x, (num_heads, hidden_dim // num_heads)
+            ),
+        )
+        # value
+        loader.port_weight(
+            keras_variable.value_dense.kernel,
+            f"{weight_key}.attention.value.weight",
+            hook_fn=lambda x, _: np.reshape(
+                x.T, (hidden_dim, num_heads, hidden_dim // num_heads)
+            ),
+        )
+        loader.port_weight(
+            keras_variable.value_dense.bias,
+            f"{weight_key}.attention.value.bias",
+            hook_fn=lambda x, _: np.reshape(
+                x, (num_heads, hidden_dim // num_heads)
+            ),
+        )
+        # output
+        loader.port_weight(
+            keras_variable.output_dense.kernel,
+            f"{weight_key}.output.dense.weight",
+            hook_fn=lambda x, _: np.reshape(
+                x.T, (num_heads, hidden_dim // num_heads, hidden_dim)
+            ),
+        )
+        loader.port_weight(
+            keras_variable.output_dense.bias, f"{weight_key}.output.dense.bias"
+        )
+    loader.port_weight(
+        keras_variable=backbone.layers[1].patch_embedding.kernel,
+        hf_weight_key="vit.embeddings.patch_embeddings.projection.weight",
+        hook_fn=lambda x, _: np.transpose(x, (2, 3, 1, 0)),
+    )
+    loader.port_weight(
+        backbone.layers[1].patch_embedding.bias,
+        "vit.embeddings.patch_embeddings.projection.bias",
+    )
+    loader.port_weight(
+        backbone.layers[1].class_token,
+        "vit.embeddings.cls_token",
+    )
+    loader.port_weight(
+        backbone.layers[1].position_embedding.embeddings,
+        "vit.embeddings.position_embeddings",
+        hook_fn=lambda x, _: x[0],
+    )
+    encoder_layers = backbone.layers[2].encoder_layers
+    for i, encoder_block in enumerate(encoder_layers):
+        prefix = "vit.encoder.layer"
+        num_heads = encoder_block.num_heads
+        hidden_dim = encoder_block.hidden_dim
+        port_mha(
+            encoder_block.mha,
+            f"{prefix}.{i}.attention",
+            num_heads,
+            hidden_dim,
+        )
+        port_ln(encoder_block.layer_norm_1, f"{prefix}.{i}.layernorm_before")
+        port_ln(encoder_block.layer_norm_2, f"{prefix}.{i}.layernorm_after")
+        port_dense(
+            encoder_block.mlp.dense_1, f"{prefix}.{i}.intermediate.dense"
+        )
+        port_dense(encoder_block.mlp.dense_2, f"{prefix}.{i}.output.dense")
+    port_ln(backbone.layers[2].layer_norm, "vit.layernorm")
+def convert_head(task, loader, transformers_config):
+    prefix = "classifier."
+    loader.port_weight(
+        task.output_dense.kernel,
+        hf_weight_key=prefix + "weight",
+        hook_fn=lambda x, _: x.T,
+    )
+    loader.port_weight(
+        task.output_dense.bias,
+        hf_weight_key=prefix + "bias",
+    )

keras_hub/src/utils/transformers/preset_loader.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Convert huggingface models to KerasHub."""
+from keras_hub.src.models.image_classifier import ImageClassifier
 from keras_hub.src.utils.preset_utils import PresetLoader
 from keras_hub.src.utils.preset_utils import jax_memory_cleanup
 from keras_hub.src.utils.transformers import convert_albert
@@ -11,6 +12,7 @@ from keras_hub.src.utils.transformers import convert_gpt2
 from keras_hub.src.utils.transformers import convert_llama3
 from keras_hub.src.utils.transformers import convert_mistral
 from keras_hub.src.utils.transformers import convert_pali_gemma
+from keras_hub.src.utils.transformers import convert_vit
 from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
@@ -37,6 +39,8 @@ class TransformersPresetLoader(PresetLoader):
             self.converter = convert_mistral
         elif model_type == "paligemma":
             self.converter = convert_pali_gemma
+        elif model_type == "vit":
+            self.converter = convert_vit
         else:
             raise ValueError(
                 "KerasHub has no converter for huggingface/transformers models "
@@ -55,6 +59,25 @@ class TransformersPresetLoader(PresetLoader):
                 self.converter.convert_weights(backbone, loader, self.config)
         return backbone
+    def load_task(self, cls, load_weights, load_task_weights, **kwargs):
+        architecture = self.config["architectures"][0]
+        if (
+            not load_task_weights
+            or not issubclass(cls, ImageClassifier)
+            or architecture == "ViTModel"
+        ):
+            return super().load_task(
+                cls, load_weights, load_task_weights, **kwargs
+            )
+        # Support loading the classification head for classifier models.
+        if architecture == "ViTForImageClassification":
+            kwargs["num_classes"] = len(self.config["id2label"])
+        task = super().load_task(cls, load_weights, load_task_weights, **kwargs)
+        if load_task_weights:
+            with SafetensorLoader(self.preset, prefix="") as loader:
+                self.converter.convert_head(task, loader, self.config)
+        return task
     def load_tokenizer(self, cls, config_name="tokenizer.json", **kwargs):
         return self.converter.convert_tokenizer(cls, self.preset, **kwargs)

keras_hub/src/utils/transformers/safetensor_utils.py CHANGED Viewed

@@ -42,12 +42,13 @@ class SafetensorLoader(contextlib.ExitStack):
         """
         Determine and return a prefixed key for a given hf weight key.
-        This method checks if there's a common prefix for the weight keys and caches it
-        for future use.
+        This method checks if there's a common prefix for the weight keys and
+        caches it for future use.
         Args:
             hf_weight_key (str): The hf weight key to check for a prefix.
-            dict_like (object): An object to get keys of safetensor file using keys() method.
+            dict_like (object): An object to get keys of safetensor file using
+                keys() method.
         Returns:
             str: The full key including the prefix (if any).

keras-hub-nightly 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl

keras-hub-nightly 0.19.0.dev202412120352py3-none-any.whl → 0.19.0.dev202412140350py3-none-any.whl