PyPI - keras-hub - Versions diffs - 0.20.0.dev1__py3-none-any.whl → 0.21.0.dev1__py3-none-any.whl - Mend

keras-hub 0.20.0.dev1py3-none-any.whl → 0.21.0.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

keras_hub/__init__.py +15 -33
keras_hub/layers/__init__.py +134 -0
keras_hub/metrics/__init__.py +11 -0
keras_hub/models/__init__.py +642 -0
keras_hub/samplers/__init__.py +18 -0
keras_hub/src/layers/modeling/reversible_embedding.py +25 -35
keras_hub/src/layers/preprocessing/image_converter.py +1 -0
keras_hub/src/layers/preprocessing/random_deletion.py +1 -1
keras_hub/src/layers/preprocessing/random_swap.py +1 -1
keras_hub/src/models/audio_to_text.py +66 -0
keras_hub/src/models/audio_to_text_preprocessor.py +80 -0
keras_hub/src/models/backbone.py +5 -2
keras_hub/src/models/cspnet/cspnet_backbone.py +51 -26
keras_hub/src/models/cspnet/cspnet_presets.py +38 -3
keras_hub/src/models/falcon/falcon_backbone.py +1 -1
keras_hub/src/models/gemma/gemma_presets.py +10 -10
keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py +3 -2
keras_hub/src/models/gemma3/gemma3_presets.py +8 -8
keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
keras_hub/src/models/llama/llama_attention.py +24 -6
keras_hub/src/models/llama/llama_backbone.py +50 -16
keras_hub/src/models/llama/llama_decoder.py +20 -3
keras_hub/src/models/llama/llama_presets.py +3 -3
keras_hub/src/models/llama/llama_rotary_embedding.py +180 -0
keras_hub/src/models/llama3/llama3_backbone.py +10 -2
keras_hub/src/models/llama3/llama3_presets.py +84 -2
keras_hub/src/models/mistral/mistral_presets.py +3 -3
keras_hub/src/models/mixtral/__init__.py +5 -0
keras_hub/src/models/mixtral/mixtral_attention.py +252 -0
keras_hub/src/models/mixtral/mixtral_backbone.py +207 -0
keras_hub/src/models/mixtral/mixtral_causal_lm.py +281 -0
keras_hub/src/models/mixtral/mixtral_causal_lm_preprocessor.py +76 -0
keras_hub/src/models/mixtral/mixtral_decoder.py +494 -0
keras_hub/src/models/mixtral/mixtral_layer_norm.py +34 -0
keras_hub/src/models/mixtral/mixtral_presets.py +26 -0
keras_hub/src/models/mixtral/mixtral_tokenizer.py +21 -0
keras_hub/src/models/moonshine/__init__.py +5 -0
keras_hub/src/models/moonshine/moonshine_audio_converter.py +301 -0
keras_hub/src/models/moonshine/moonshine_audio_to_text.py +383 -0
keras_hub/src/models/moonshine/moonshine_audio_to_text_preprocessor.py +272 -0
keras_hub/src/models/moonshine/moonshine_backbone.py +478 -0
keras_hub/src/models/moonshine/moonshine_decoder.py +313 -0
keras_hub/src/models/moonshine/moonshine_encoder.py +212 -0
keras_hub/src/models/moonshine/moonshine_layers.py +239 -0
keras_hub/src/models/moonshine/moonshine_multi_head_attention.py +355 -0
keras_hub/src/models/moonshine/moonshine_presets.py +25 -0
keras_hub/src/models/moonshine/moonshine_tokenizer.py +62 -0
keras_hub/src/models/pali_gemma/pali_gemma_presets.py +11 -11
keras_hub/src/models/pali_gemma/pali_gemma_vit.py +1 -1
keras_hub/src/models/qwen/__init__.py +4 -0
keras_hub/src/models/qwen/qwen_attention.py +3 -1
keras_hub/src/models/qwen/qwen_backbone.py +8 -1
keras_hub/src/models/qwen/qwen_causal_lm.py +7 -0
keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py +7 -0
keras_hub/src/models/qwen/qwen_presets.py +61 -0
keras_hub/src/models/qwen/qwen_tokenizer.py +9 -0
keras_hub/src/models/qwen_moe/__init__.py +5 -0
keras_hub/src/models/qwen_moe/qwen_moe_attention.py +375 -0
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +373 -0
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py +350 -0
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py +17 -0
keras_hub/src/models/qwen_moe/qwen_moe_decoder.py +625 -0
keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py +32 -0
keras_hub/src/models/qwen_moe/qwen_moe_presets.py +15 -0
keras_hub/src/models/qwen_moe/qwen_moe_tokenizer.py +46 -0
keras_hub/src/models/retinanet/retinanet_image_converter.py +0 -13
keras_hub/src/models/retinanet/retinanet_presets.py +2 -2
keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +0 -18
keras_hub/src/models/segformer/segformer_presets.py +12 -12
keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +6 -0
keras_hub/src/models/task.py +5 -2
keras_hub/src/models/xception/__init__.py +5 -0
keras_hub/src/models/xception/xception_backbone.py +188 -0
keras_hub/src/models/xception/xception_image_classifier.py +12 -0
keras_hub/src/models/xception/xception_image_classifier_preprocessor.py +14 -0
keras_hub/src/models/xception/xception_image_converter.py +8 -0
keras_hub/src/models/xception/xception_presets.py +14 -0
keras_hub/src/tests/mocks/mock_gemma3_tokenizer.py +155 -0
keras_hub/src/utils/coco/__init__.py +0 -0
keras_hub/src/utils/coco/coco_utils.py +133 -0
keras_hub/src/utils/imagenet/imagenet_utils.py +36 -0
keras_hub/src/utils/keras_utils.py +11 -0
keras_hub/src/utils/preset_utils.py +70 -10
keras_hub/src/utils/tensor_utils.py +27 -1
keras_hub/src/utils/timm/convert_cspnet.py +94 -23
keras_hub/src/utils/timm/preset_loader.py +6 -6
keras_hub/src/utils/transformers/convert_llama3.py +21 -1
keras_hub/src/utils/transformers/convert_mixtral.py +139 -0
keras_hub/src/utils/transformers/convert_qwen.py +1 -0
keras_hub/src/utils/transformers/convert_qwen_moe.py +253 -0
keras_hub/src/utils/transformers/preset_loader.py +6 -0
keras_hub/src/{version_utils.py → version.py} +1 -1
keras_hub/tokenizers/__init__.py +117 -0
keras_hub/utils/__init__.py +21 -0
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/METADATA +6 -20
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/RECORD +98 -55
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/WHEEL +1 -1
keras_hub/api/__init__.py +0 -15
keras_hub/api/layers/__init__.py +0 -86
keras_hub/api/metrics/__init__.py +0 -11
keras_hub/api/models/__init__.py +0 -416
keras_hub/api/samplers/__init__.py +0 -16
keras_hub/api/tokenizers/__init__.py +0 -58
keras_hub/api/utils/__init__.py +0 -9
{keras_hub-0.20.0.dev1.dist-info → keras_hub-0.21.0.dev1.dist-info}/top_level.txt +0 -0

keras_hub/src/utils/coco/coco_utils.py ADDED Viewed

@@ -0,0 +1,133 @@
+from keras_hub.src.api_export import keras_hub_export
+@keras_hub_export("keras_hub.utils.coco_id_to_name")
+def coco_id_to_name(id):
+    """Convert a single COCO class name to a class ID.
+    Args:
+        id: An integer class id from 0 to 91.
+    Returns:
+        The human readable image class name, e.g. "bicycle".
+    Example:
+    >>> keras_hub.utils.coco_id_to_name(2)
+    'bicycle'
+    """
+    return COCO_NAMES[id]
+@keras_hub_export("keras_hub.utils.coco_name_to_id")
+def coco_name_to_id(name):
+    """Convert a single COCO class name to a class ID.
+    Args:
+        name: A human readable image class name, e.g. "bicycle".
+    Returns:
+        The integer class id from 0 to 999.
+    Example:
+    >>> keras_hub.utils.coco_name_to_id("bicycle")
+    2
+    """
+    return COCO_IDS[name]
+COCO_NAMES = {
+    0: "unlabeled",
+    1: "person",
+    2: "bicycle",
+    3: "car",
+    4: "motorcycle",
+    5: "airplane",
+    6: "bus",
+    7: "train",
+    8: "truck",
+    9: "boat",
+    10: "traffic_light",
+    11: "fire_hydrant",
+    12: "street_sign",
+    13: "stop_sign",
+    14: "parking_meter",
+    15: "bench",
+    16: "bird",
+    17: "cat",
+    18: "dog",
+    19: "horse",
+    20: "sheep",
+    21: "cow",
+    22: "elephant",
+    23: "bear",
+    24: "zebra",
+    25: "giraffe",
+    26: "hat",
+    27: "backpack",
+    28: "umbrella",
+    29: "shoe",
+    30: "eye_glasses",
+    31: "handbag",
+    32: "tie",
+    33: "suitcase",
+    34: "frisbee",
+    35: "skis",
+    36: "snowboard",
+    37: "sports_ball",
+    38: "kite",
+    39: "baseball_bat",
+    40: "baseball_glove",
+    41: "skateboard",
+    42: "surfboard",
+    43: "tennis_racket",
+    44: "bottle",
+    45: "plate",
+    46: "wine_glass",
+    47: "cup",
+    48: "fork",
+    49: "knife",
+    50: "spoon",
+    51: "bowl",
+    52: "banana",
+    53: "apple",
+    54: "sandwich",
+    55: "orange",
+    56: "broccoli",
+    57: "carrot",
+    58: "hot_dog",
+    59: "pizza",
+    60: "donut",
+    61: "cake",
+    62: "chair",
+    63: "couch",
+    64: "potted_plant",
+    65: "bed",
+    66: "mirror",
+    67: "dining_table",
+    68: "window",
+    69: "desk",
+    70: "toilet",
+    71: "door",
+    72: "tv",
+    73: "laptop",
+    74: "mouse",
+    75: "remote",
+    76: "keyboard",
+    77: "cell_phone",
+    78: "microwave",
+    79: "oven",
+    80: "toaster",
+    81: "sink",
+    82: "refrigerator",
+    83: "blender",
+    84: "book",
+    85: "clock",
+    86: "vase",
+    87: "scissors",
+    88: "teddy_bear",
+    89: "hair_drier",
+    90: "toothbrush",
+    91: "hair_brush",
+}
+COCO_IDS = {v: k for k, v in COCO_NAMES.items()}

keras_hub/src/utils/imagenet/imagenet_utils.py CHANGED Viewed

@@ -3,6 +3,40 @@ from keras import ops
 from keras_hub.src.api_export import keras_hub_export
+@keras_hub_export("keras_hub.utils.imagenet_id_to_name")
+def imagenet_id_to_name(id):
+    """Convert a single ImageNet class ID to a class name.
+    Args:
+        id: An integer class id from 0 to 999.
+    Returns:
+        The human readable image class name, e.g. "goldfish".
+    Example:
+    >>> keras_hub.utils.imagenet_id_to_name(1)
+    'goldfish'
+    """
+    return IMAGENET_NAMES[id][1]
+@keras_hub_export("keras_hub.utils.imagenet_name_to_id")
+def imagenet_name_to_id(name):
+    """Convert a single ImageNet class name to a class ID.
+    Args:
+        name: A human readable image class name, e.g. "goldfish".
+    Returns:
+        The integer class id from 0 to 999.
+    Example:
+    >>> keras_hub.utils.imagenet_name_to_id("goldfish")
+    1
+    """
+    return IMAGENET_IDS[name]
 @keras_hub_export("keras_hub.utils.decode_imagenet_predictions")
 def decode_imagenet_predictions(preds, top=5, include_synset_ids=False):
     """Decodes the predictions for an ImageNet-1k prediction.
@@ -1052,3 +1086,5 @@ IMAGENET_NAMES = {
     998: ("n13133613", "ear"),
     999: ("n15075141", "toilet_tissue"),
 }
+IMAGENET_IDS = {v[1]: k for k, v in IMAGENET_NAMES.items()}

keras_hub/src/utils/keras_utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import inspect
 import sys
 import keras
@@ -147,3 +148,13 @@ def get_gpu_names():
         ]
     else:
         return [""]
+def sharded_weights_available():
+    """Whether sharded weights serialization is available.
+    Returns:
+        `True` if sharded weights are available, `False` otherwise.
+    """
+    save_weights_signature = inspect.signature(keras.saving.save_weights)
+    return "max_shard_size" in save_weights_signature.parameters

keras_hub/src/utils/preset_utils.py CHANGED Viewed

@@ -10,6 +10,8 @@ from absl import logging
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.utils.keras_utils import print_msg
+from keras_hub.src.utils.keras_utils import sharded_weights_available
+from keras_hub.src.utils.tensor_utils import get_tensor_size_in_bits
 try:
     import kagglehub
@@ -48,6 +50,7 @@ METADATA_FILE = "metadata.json"
 # Weight file names.
 MODEL_WEIGHTS_FILE = "model.weights.h5"
 TASK_WEIGHTS_FILE = "task.weights.h5"
+SHARDED_MODEL_WEIGHTS_CONFIG_FILE = "model.weights.json"
 # HuggingFace filenames.
 README_FILE = "README.md"
@@ -647,7 +650,7 @@ class KerasPresetLoader(PresetLoader):
         backbone = self._load_serialized_object(self.config, **kwargs)
         if load_weights:
             jax_memory_cleanup(backbone)
-            backbone.load_weights(get_file(self.preset, MODEL_WEIGHTS_FILE))
+            self._load_backbone_weights(backbone)
         return backbone
     def load_tokenizer(self, cls, config_file=TOKENIZER_CONFIG_FILE, **kwargs):
@@ -697,8 +700,7 @@ class KerasPresetLoader(PresetLoader):
                 task.load_task_weights(task_weights)
             else:
                 jax_memory_cleanup(task.backbone)
-            backbone_weights = get_file(self.preset, MODEL_WEIGHTS_FILE)
-            task.backbone.load_weights(backbone_weights)
+            self._load_backbone_weights(task.backbone)
         return task
     def load_preprocessor(
@@ -726,18 +728,64 @@ class KerasPresetLoader(PresetLoader):
         config["config"] = {**config["config"], **kwargs}
         return keras.saving.deserialize_keras_object(config)
+    def _get_sharded_filenames(self, config_path):
+        with open(config_path, encoding="utf-8") as config_file:
+            config = json.load(config_file)
+        weight_map = config["weight_map"]
+        return sorted(set(weight_map.values()))
+    def _load_backbone_weights(self, backbone):
+        # Detect if the backbone is sharded or not.
+        has_single_file_weights = check_file_exists(
+            self.preset, MODEL_WEIGHTS_FILE
+        )
+        if has_single_file_weights:
+            filepath = get_file(self.preset, MODEL_WEIGHTS_FILE)
+        else:
+            if not sharded_weights_available():
+                raise RuntimeError(
+                    "Sharded weights loading is not supported in the current "
+                    f"Keras version {keras.__version__}. "
+                    "Please update to a newer version."
+                )
+            filepath = get_file(self.preset, SHARDED_MODEL_WEIGHTS_CONFIG_FILE)
+            sharded_filenames = self._get_sharded_filenames(filepath)
+            for sharded_filename in sharded_filenames:
+                # Download the sharded weights.
+                _ = get_file(self.preset, sharded_filename)
+        backbone.load_weights(filepath)
 class KerasPresetSaver:
     def __init__(self, preset_dir):
         os.makedirs(preset_dir, exist_ok=True)
         self.preset_dir = preset_dir
-    def save_backbone(self, backbone):
+    def save_backbone(self, backbone, max_shard_size=10):
         self._save_serialized_object(backbone, config_file=CONFIG_FILE)
-        backbone_weight_path = os.path.join(self.preset_dir, MODEL_WEIGHTS_FILE)
-        backbone.save_weights(backbone_weight_path)
         self._save_metadata(backbone)
+        # Save the weights.
+        backbone_size_in_bytes = self._get_variables_size_in_bytes(
+            backbone.variables
+        )
+        backbone_size_in_gb = backbone_size_in_bytes / (1024**3)
+        # If the size of the backbone is larger than `max_shard_size`, save
+        # sharded weights.
+        if sharded_weights_available() and backbone_size_in_gb > max_shard_size:
+            backbone_sharded_weights_config_path = os.path.join(
+                self.preset_dir, SHARDED_MODEL_WEIGHTS_CONFIG_FILE
+            )
+            backbone.save_weights(
+                backbone_sharded_weights_config_path,
+                max_shard_size=max_shard_size,
+            )
+        else:
+            backbone_weight_path = os.path.join(
+                self.preset_dir, MODEL_WEIGHTS_FILE
+            )
+            backbone.save_weights(backbone_weight_path)
     def save_tokenizer(self, tokenizer):
         config_file = TOKENIZER_CONFIG_FILE
         if hasattr(tokenizer, "config_file"):
@@ -755,7 +803,7 @@ class KerasPresetSaver:
     def save_image_converter(self, converter):
         self._save_serialized_object(converter, IMAGE_CONVERTER_CONFIG_FILE)
-    def save_task(self, task):
+    def save_task(self, task, max_shard_size=10):
         # Save task specific config and weights.
         self._save_serialized_object(task, TASK_CONFIG_FILE)
         if task.has_task_weights():
@@ -763,10 +811,12 @@ class KerasPresetSaver:
             task.save_task_weights(task_weight_path)
         # Save backbone.
         if hasattr(task.backbone, "save_to_preset"):
-            task.backbone.save_to_preset(self.preset_dir)
+            task.backbone.save_to_preset(
+                self.preset_dir, max_shard_size=max_shard_size
+            )
         else:
             # Allow saving a `keras.Model` that is not a backbone subclass.
-            self.save_backbone(task.backbone)
+            self.save_backbone(task.backbone, max_shard_size=max_shard_size)
         # Save preprocessor.
         if task.preprocessor and hasattr(task.preprocessor, "save_to_preset"):
             task.preprocessor.save_to_preset(self.preset_dir)
@@ -801,7 +851,7 @@ class KerasPresetSaver:
     def _save_metadata(self, layer):
         from keras_hub.src.models.task import Task
-        from keras_hub.src.version_utils import __version__ as keras_hub_version
+        from keras_hub.src.version import __version__ as keras_hub_version
         # Find all tasks that are compatible with the backbone.
         # E.g. for `BertBackbone` we would have `TextClassifier` and `MaskedLM`.
@@ -823,3 +873,13 @@ class KerasPresetSaver:
         metadata_path = os.path.join(self.preset_dir, METADATA_FILE)
         with open(metadata_path, "w") as metadata_file:
             metadata_file.write(json.dumps(metadata, indent=4))
+    def _get_variables_size_in_bytes(self, variables):
+        unique_variables = {}
+        for v in variables:
+            if id(v) not in unique_variables:
+                unique_variables[id(v)] = (v.shape, v.dtype)
+        total_memory_size = 0
+        for shape, dtype in unique_variables.values():
+            total_memory_size += get_tensor_size_in_bits(shape, dtype)
+        return total_memory_size / 8

keras_hub/src/utils/tensor_utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import contextlib
 import functools
 import inspect
+import math
+import re
 import threading
 import keras
@@ -305,6 +307,29 @@ def is_string_dtype(dtype):
     return "string" in keras.backend.standardize_dtype(dtype)
+def get_dtype_size_in_bits(dtype):
+    """Get the size of a given dtype in bits."""
+    dtype = keras.backend.standardize_dtype(dtype)
+    # If dtype is bool, return 1 immediately.
+    if dtype == "bool":
+        return 1
+    # Else, we extract the bit size from the string.
+    return int(re.sub(r"bfloat|float|uint|int", "", dtype))
+def get_tensor_size_in_bits(shape, dtype):
+    """Calculate the size given dtype and shape in bits.
+    Args:
+        dtype: The dtype of the tensor.
+        shape: List of iterables representing the shape of the tensor.
+    Returns:
+        The size of the tensor in bytes.
+    """
+    return math.prod(shape) * get_dtype_size_in_bits(dtype)
 def any_equal(inputs, values, padding_mask):
     """Return a mask that is True anywhere `inputs` has a value in `values`.
@@ -320,7 +345,8 @@ def any_equal(inputs, values, padding_mask):
     Returns:
         A tensor with `inputs` shape where each position is True if it contains
             a value from any `values`. Padding mask will be applied before
-            returning."""
+            returning.
+    """
     output = ops.equal(inputs, values[0])
     for value in values[1:]:
         value_equality = ops.equal(inputs, value)

keras_hub/src/utils/timm/convert_cspnet.py CHANGED Viewed

@@ -17,10 +17,69 @@ def convert_backbone_config(timm_config):
         bottle_ratio = (0.5,) + (1.0,)
         block_ratio = (1.0,) + (0.5,)
         expand_ratio = (2.0,) + (1.0,)
+        stem_padding = "same"
+        stem_pooling = None
         stage_type = "csp"
+        groups = 1
         block_type = "dark_block"
         down_growth = True
-        stackwise_strides = 2
+        stackwise_strides = [2, 2, 2, 2, 2]
+        avg_down = False
+        cross_linear = False
+    elif timm_architecture == "cspresnet50":
+        stem_filters = 64
+        stem_kernel_size = 7
+        stem_strides = 4
+        stackwise_depth = [3, 3, 5, 2]
+        stackwise_strides = [1, 2, 2, 2]
+        stackwise_num_filters = [128, 256, 512, 1024]
+        block_type = "bottleneck_block"
+        stage_type = "csp"
+        bottle_ratio = [0.5]
+        block_ratio = [1.0]
+        expand_ratio = [2.0]
+        stem_padding = "valid"
+        stem_pooling = "max"
+        avg_down = False
+        groups = 1
+        down_growth = False
+        cross_linear = True
+    elif timm_architecture == "cspresnext50":
+        stem_filters = 64
+        stem_kernel_size = 7
+        stem_strides = 4
+        stackwise_depth = [3, 3, 5, 2]
+        stackwise_num_filters = [256, 512, 1024, 2048]
+        bottle_ratio = [1.0]
+        block_ratio = [0.5]
+        expand_ratio = [1.0]
+        stage_type = "csp"
+        block_type = "bottleneck_block"
+        stem_pooling = "max"
+        stackwise_strides = [1, 2, 2, 2]
+        groups = 32
+        stem_padding = "valid"
+        avg_down = False
+        down_growth = False
+        cross_linear = True
+    elif timm_architecture == "darknet53":
+        stem_filters = 32
+        stem_kernel_size = 3
+        stem_strides = 1
+        stackwise_depth = [1, 2, 8, 8, 4]
+        stackwise_num_filters = [64, 128, 256, 512, 1024]
+        bottle_ratio = [0.5]
+        block_ratio = [1.0]
+        groups = 1
+        expand_ratio = [1.0]
+        stage_type = "dark"
+        block_type = "dark_block"
+        stem_pooling = None
+        stackwise_strides = [2, 2, 2, 2, 2]
+        stem_padding = "same"
+        avg_down = False
+        down_growth = False
+        cross_linear = False
     else:
         raise ValueError(
             f"Currently, the architecture {timm_architecture} is not supported."
@@ -38,6 +97,11 @@ def convert_backbone_config(timm_config):
         block_type=block_type,
         stackwise_strides=stackwise_strides,
         down_growth=down_growth,
+        stem_pooling=stem_pooling,
+        stem_padding=stem_padding,
+        avg_down=avg_down,
+        cross_linear=cross_linear,
+        groups=groups,
     )
@@ -81,21 +145,36 @@ def convert_weights(backbone, loader, timm_config):
     stackwise_depth = backbone.stackwise_depth
     stage_type = backbone.stage_type
     block_type = backbone.block_type
+    strides = backbone.stackwise_strides
     for idx, block in enumerate(stackwise_depth):
-        port_conv2d(
-            f"stages.{idx}.conv_down.conv",
-            f"stage_{idx}_{stage_type}_conv_down_1",
-        )
-        port_batch_normalization(
-            f"stages.{idx}.conv_down.bn", f"stage_{idx}_{stage_type}_bn_1"
-        )
-        port_conv2d(
-            f"stages.{idx}.conv_exp.conv", f"stage_{idx}_{stage_type}_conv_exp"
-        )
-        port_batch_normalization(
-            f"stages.{idx}.conv_exp.bn", f"stage_{idx}_{stage_type}_bn_2"
-        )
+        if strides[idx] != 1 or stage_type == "dark":
+            if strides[idx] == 2 and backbone.avg_down:
+                port_conv2d(
+                    f"stages.{idx}.conv_down.1.conv",
+                    f"stage_{idx}_{stage_type}_conv_down_1",
+                )
+                port_batch_normalization(
+                    f"stages.{idx}.conv_down.1.bn",
+                    f"stage_{idx}_{stage_type}_bn_1",
+                )
+            else:
+                port_conv2d(
+                    f"stages.{idx}.conv_down.conv",
+                    f"stage_{idx}_{stage_type}_conv_down_1",
+                )
+                port_batch_normalization(
+                    f"stages.{idx}.conv_down.bn",
+                    f"stage_{idx}_{stage_type}_bn_1",
+                )
+        if stage_type != "dark":
+            port_conv2d(
+                f"stages.{idx}.conv_exp.conv",
+                f"stage_{idx}_{stage_type}_conv_exp",
+            )
+            port_batch_normalization(
+                f"stages.{idx}.conv_exp.bn", f"stage_{idx}_{stage_type}_bn_2"
+            )
         for i in range(block):
             port_conv2d(
@@ -133,16 +212,8 @@ def convert_weights(backbone, loader, timm_config):
                 f"stages.{idx}.conv_transition_b.bn",
                 f"stage_{idx}_{stage_type}_transition_b_bn",
             )
-            port_conv2d(
-                f"stages.{idx}.conv_transition.conv",
-                f"stage_{idx}_{stage_type}_conv_transition",
-            )
-            port_batch_normalization(
-                f"stages.{idx}.conv_transition.bn",
-                f"stage_{idx}_{stage_type}_transition_bn",
-            )
-        else:
+        if stage_type != "dark":
             port_conv2d(
                 f"stages.{idx}.conv_transition.conv",
                 f"stage_{idx}_{stage_type}_conv_transition",

keras_hub/src/utils/timm/preset_loader.py CHANGED Viewed

@@ -16,17 +16,17 @@ class TimmPresetLoader(PresetLoader):
     def __init__(self, preset, config):
         super().__init__(preset, config)
         architecture = self.config["architecture"]
-        if "resnet" in architecture:
+        if architecture.startswith("resnet"):
             self.converter = convert_resnet
-        elif "csp" in architecture:
+        elif architecture.startswith(("csp", "dark")):
             self.converter = convert_cspnet
-        elif "densenet" in architecture:
+        elif architecture.startswith("densenet"):
             self.converter = convert_densenet
-        elif "mobilenet" in architecture:
+        elif architecture.startswith("mobilenet"):
             self.converter = convert_mobilenet
-        elif "vgg" in architecture:
+        elif architecture.startswith("vgg"):
             self.converter = convert_vgg
-        elif "efficientnet" in architecture:
+        elif architecture.startswith("efficientnet"):
             self.converter = convert_efficientnet
         else:
             raise ValueError(

keras_hub/src/utils/transformers/convert_llama3.py CHANGED Viewed

@@ -7,7 +7,7 @@ backbone_cls = Llama3Backbone
 def convert_backbone_config(transformers_config):
-    return {
+    backbone_config = {
         "vocabulary_size": transformers_config["vocab_size"],
         "num_layers": transformers_config["num_hidden_layers"],
         "num_query_heads": transformers_config["num_attention_heads"],
@@ -15,8 +15,28 @@ def convert_backbone_config(transformers_config):
         "intermediate_dim": transformers_config["intermediate_size"],
         "num_key_value_heads": transformers_config["num_key_value_heads"],
         "tie_word_embeddings": transformers_config["tie_word_embeddings"],
+        "rope_max_wavelength": transformers_config["rope_theta"],
     }
+    if transformers_config.get("rope_scaling", None) is not None:
+        if transformers_config["rope_scaling"]["rope_type"] != "llama3":
+            raise ValueError("The config should be a valid llama3 config.")
+        backbone_config["rope_frequency_adjustment_factor"] = (
+            transformers_config["rope_scaling"]["factor"]
+        )
+        backbone_config["rope_low_freq_factor"] = transformers_config[
+            "rope_scaling"
+        ]["low_freq_factor"]
+        backbone_config["rope_high_freq_factor"] = transformers_config[
+            "rope_scaling"
+        ]["high_freq_factor"]
+        backbone_config["rope_pretraining_sequence_length"] = (
+            transformers_config["rope_scaling"][
+                "original_max_position_embeddings"
+            ]
+        )
+    return backbone_config
 def convert_weights(backbone, loader, transformers_config):
     loader.port_weight(

keras-hub 0.20.0.dev1__py3-none-any.whl → 0.21.0.dev1__py3-none-any.whl

keras-hub 0.20.0.dev1py3-none-any.whl → 0.21.0.dev1py3-none-any.whl