PyPI - keras-hub-nightly - Versions diffs - 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl - Mend

keras-hub-nightly 0.15.0.dev20240823171555py3-none-any.whl → 0.16.0.dev2024092017py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

keras_hub/__init__.py +0 -6
keras_hub/api/__init__.py +2 -0
keras_hub/api/bounding_box/__init__.py +36 -0
keras_hub/api/layers/__init__.py +14 -0
keras_hub/api/models/__init__.py +97 -48
keras_hub/api/tokenizers/__init__.py +30 -0
keras_hub/api/utils/__init__.py +22 -0
keras_hub/src/api_export.py +15 -9
keras_hub/src/bounding_box/__init__.py +13 -0
keras_hub/src/bounding_box/converters.py +529 -0
keras_hub/src/bounding_box/formats.py +162 -0
keras_hub/src/bounding_box/iou.py +263 -0
keras_hub/src/bounding_box/to_dense.py +95 -0
keras_hub/src/bounding_box/to_ragged.py +99 -0
keras_hub/src/bounding_box/utils.py +194 -0
keras_hub/src/bounding_box/validate_format.py +99 -0
keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
keras_hub/src/layers/preprocessing/image_converter.py +130 -0
keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
keras_hub/src/layers/preprocessing/random_swap.py +33 -31
keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
keras_hub/src/models/albert/__init__.py +1 -2
keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
keras_hub/src/models/albert/albert_tokenizer.py +17 -36
keras_hub/src/models/backbone.py +12 -34
keras_hub/src/models/bart/__init__.py +1 -2
keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
keras_hub/src/models/bart/bart_tokenizer.py +12 -39
keras_hub/src/models/bert/__init__.py +1 -5
keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
keras_hub/src/models/bert/bert_presets.py +1 -4
keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
keras_hub/src/models/bert/bert_tokenizer.py +17 -35
keras_hub/src/models/bloom/__init__.py +1 -2
keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
keras_hub/src/models/causal_lm.py +10 -29
keras_hub/src/models/causal_lm_preprocessor.py +195 -0
keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
keras_hub/src/models/deberta_v3/__init__.py +1 -4
keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
keras_hub/src/models/densenet/densenet_backbone.py +46 -22
keras_hub/src/models/distil_bert/__init__.py +1 -4
keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
keras_hub/src/models/efficientnet/__init__.py +13 -0
keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
keras_hub/src/models/efficientnet/mbconv.py +238 -0
keras_hub/src/models/electra/__init__.py +1 -2
keras_hub/src/models/electra/electra_tokenizer.py +17 -32
keras_hub/src/models/f_net/__init__.py +1 -2
keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
keras_hub/src/models/falcon/__init__.py +1 -2
keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
keras_hub/src/models/gemma/__init__.py +1 -2
keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
keras_hub/src/models/gpt2/__init__.py +1 -2
keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
keras_hub/src/models/image_classifier.py +0 -5
keras_hub/src/models/image_classifier_preprocessor.py +83 -0
keras_hub/src/models/llama/__init__.py +1 -2
keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
keras_hub/src/models/llama/llama_tokenizer.py +12 -25
keras_hub/src/models/llama3/__init__.py +1 -2
keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
keras_hub/src/models/masked_lm.py +0 -2
keras_hub/src/models/masked_lm_preprocessor.py +156 -0
keras_hub/src/models/mistral/__init__.py +1 -2
keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
keras_hub/src/models/mobilenet/__init__.py +13 -0
keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
keras_hub/src/models/opt/__init__.py +1 -2
keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
keras_hub/src/models/opt/opt_tokenizer.py +12 -41
keras_hub/src/models/pali_gemma/__init__.py +1 -4
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
keras_hub/src/models/phi3/__init__.py +1 -2
keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
keras_hub/src/models/preprocessor.py +72 -83
keras_hub/src/models/resnet/__init__.py +6 -0
keras_hub/src/models/resnet/resnet_backbone.py +390 -42
keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
keras_hub/src/models/resnet/resnet_presets.py +95 -0
keras_hub/src/models/retinanet/__init__.py +13 -0
keras_hub/src/models/retinanet/anchor_generator.py +175 -0
keras_hub/src/models/retinanet/box_matcher.py +259 -0
keras_hub/src/models/retinanet/non_max_supression.py +578 -0
keras_hub/src/models/roberta/__init__.py +1 -2
keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
keras_hub/src/models/t5/__init__.py +1 -2
keras_hub/src/models/t5/t5_tokenizer.py +13 -23
keras_hub/src/models/task.py +71 -116
keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
keras_hub/src/models/text_classifier_preprocessor.py +138 -0
keras_hub/src/models/whisper/__init__.py +1 -2
keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
keras_hub/src/models/whisper/whisper_backbone.py +0 -3
keras_hub/src/models/whisper/whisper_presets.py +10 -10
keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
keras_hub/src/models/xlm_roberta/__init__.py +1 -4
keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
keras_hub/src/tests/test_case.py +46 -0
keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
keras_hub/src/tokenizers/tokenizer.py +67 -32
keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
keras_hub/src/utils/imagenet/__init__.py +13 -0
keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
keras_hub/src/utils/keras_utils.py +0 -50
keras_hub/src/utils/preset_utils.py +230 -68
keras_hub/src/utils/tensor_utils.py +187 -69
keras_hub/src/utils/timm/convert_resnet.py +19 -16
keras_hub/src/utils/timm/preset_loader.py +66 -0
keras_hub/src/utils/transformers/convert_albert.py +193 -0
keras_hub/src/utils/transformers/convert_bart.py +373 -0
keras_hub/src/utils/transformers/convert_bert.py +7 -17
keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
keras_hub/src/utils/transformers/convert_gemma.py +5 -19
keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
keras_hub/src/utils/transformers/convert_llama3.py +7 -18
keras_hub/src/utils/transformers/convert_mistral.py +129 -0
keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
keras_hub/src/utils/transformers/preset_loader.py +77 -0
keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
keras_hub/src/version_utils.py +1 -1
keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
{keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
keras_hub/src/models/bart/bart_preprocessor.py +0 -276
keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
keras_hub/src/models/electra/electra_preprocessor.py +0 -154
keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
keras_hub/src/models/llama/llama_preprocessor.py +0 -189
keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
keras_hub/src/models/opt/opt_preprocessor.py +0 -188
keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
keras_hub/src/utils/timm/convert.py +0 -37
keras_hub/src/utils/transformers/convert.py +0 -101
keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
{keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0

keras_hub/src/utils/keras_utils.py CHANGED Viewed

@@ -18,8 +18,6 @@ import keras
 from absl import logging
 from packaging.version import parse
-from keras_hub.src.utils.tensor_utils import is_tensor_type
 try:
     import tensorflow as tf
 except ImportError:
@@ -39,54 +37,6 @@ def clone_initializer(initializer):
     return initializer.__class__.from_config(config)
-def convert_inputs_to_list_of_tensor_segments(x):
-    """Converts user inputs to a list of a tensor segments.
-    For models and layers which accept lists of string tensors to pack together,
-    this method converts user inputs to a uniform format in a way that can be
-    considered canonical for the library.
-    We handle the following:
-    - A single string will be converted to a tensor and wrapped in a list.
-    - A list of strings will be converted to a tensor and wrapped in a list.
-    - A single tensor will be wrapped in a list.
-    - A list of tensors will be passed through unaltered.
-    All other inputs will result in an error. This effectively means that users
-    who would like to pack multiple segments together should convert those
-    segments to tensors before calling the layer. This removes any ambiguity
-    in the input for those cases.
-    """
-    # Check the input type.
-    is_string = isinstance(x, (str, bytes))
-    is_tensor = is_tensor_type(x)
-    is_string_list = (
-        isinstance(x, (list, tuple)) and x and isinstance(x[0], (str, bytes))
-    )
-    is_tensor_list = isinstance(x, (list, tuple)) and x and is_tensor_type(x[0])
-    if is_string or is_string_list:
-        # Automatically convert raw strings or string lists to tensors.
-        # Wrap this input as a single (possibly batched) segment.
-        x = [tf.convert_to_tensor(x)]
-    elif is_tensor:
-        # Automatically wrap a single tensor as a single segment.
-        x = [x]
-    elif is_tensor_list:
-        # Pass lists of tensors though unaltered.
-        x = x
-    else:
-        # Error for all other input.
-        raise ValueError(
-            f"Unsupported input for `x`. `x` should be a string, a list of "
-            "strings, or a list of tensors. If passing multiple segments "
-            "which should packed together, please convert your inputs to a "
-            f"list of tensors. Received `x={x}`"
-        )
-    return x
 def print_msg(message, line_break=True):
     """Print the message to absl logging or stdout."""
     # Copied from core Keras.

keras_hub/src/utils/preset_utils.py CHANGED Viewed

@@ -60,6 +60,8 @@ TOKENIZER_ASSET_DIR = "assets/tokenizer"
 # Config file names.
 CONFIG_FILE = "config.json"
 TOKENIZER_CONFIG_FILE = "tokenizer.json"
+AUDIO_CONVERTER_CONFIG_FILE = "audio_converter.json"
+IMAGE_CONVERTER_CONFIG_FILE = "image_converter.json"
 TASK_CONFIG_FILE = "task.json"
 PREPROCESSOR_CONFIG_FILE = "preprocessor.json"
 METADATA_FILE = "metadata.json"
@@ -77,10 +79,10 @@ SAFETENSOR_FILE = "model.safetensors"
 # Global state for preset registry.
 BUILTIN_PRESETS = {}
-BUILTIN_PRESETS_FOR_CLASS = collections.defaultdict(dict)
+BUILTIN_PRESETS_FOR_BACKBONE = collections.defaultdict(dict)
-def register_presets(presets, classes):
+def register_presets(presets, backbone_cls):
     """Register built-in presets for a set of classes.
     Note that this is intended only for models and presets shipped in the
@@ -88,18 +90,26 @@ def register_presets(presets, classes):
     """
     for preset in presets:
         BUILTIN_PRESETS[preset] = presets[preset]
-        for cls in classes:
-            BUILTIN_PRESETS_FOR_CLASS[cls][preset] = presets[preset]
+        BUILTIN_PRESETS_FOR_BACKBONE[backbone_cls][preset] = presets[preset]
-def list_presets(cls):
+def builtin_presets(cls):
     """Find all registered built-in presets for a class."""
-    return dict(BUILTIN_PRESETS_FOR_CLASS[cls])
+    presets = {}
+    if cls in BUILTIN_PRESETS_FOR_BACKBONE:
+        presets.update(BUILTIN_PRESETS_FOR_BACKBONE[cls])
+    backbone_cls = getattr(cls, "backbone_cls", None)
+    if backbone_cls:
+        presets.update(builtin_presets(backbone_cls))
+    for subclass in list_subclasses(cls):
+        presets.update(builtin_presets(subclass))
+    return presets
 def list_subclasses(cls):
     """Find all registered subclasses of a class."""
-    custom_objects = keras.saving.get_custom_objects().values()
+    # Deduplicate the lists, since we have to register object twice for compat.
+    custom_objects = set(keras.saving.get_custom_objects().values())
     subclasses = []
     for x in custom_objects:
         if inspect.isclass(x) and x != cls and issubclass(x, cls):
@@ -107,6 +117,26 @@ def list_subclasses(cls):
     return subclasses
+def find_subclass(preset, cls, backbone_cls):
+    """Find a subclass that is compatible with backbone_cls."""
+    subclasses = list_subclasses(cls)
+    subclasses = filter(lambda x: x.backbone_cls == backbone_cls, subclasses)
+    subclasses = list(subclasses)
+    if not subclasses:
+        raise ValueError(
+            f"Unable to find a subclass of {cls.__name__} that is compatible "
+            f"with {backbone_cls.__name__} found in preset '{preset}'."
+        )
+    # If we find multiple subclasses, try to filter to direct subclasses of
+    # the class we are trying to instantiate.
+    if len(subclasses) > 1:
+        directs = list(filter(lambda x: x in cls.__bases__, subclasses))
+        if len(directs) > 1:
+            subclasses = directs
+    # Return the subclass that was registered first (prefer built-in classes).
+    return subclasses[0]
 def get_file(preset, path):
     """Download a preset file in necessary and return the local path."""
     # TODO: Add tests for FileNotFound exceptions.
@@ -197,7 +227,7 @@ def get_file(preset, path):
             else:
                 raise ValueError(message)
     elif os.path.exists(preset):
-        # Assume a local filepath.
+        # Assume a local filepath.pyth
         local_path = os.path.join(preset, path)
         if not os.path.exists(local_path):
             raise FileNotFoundError(
@@ -272,6 +302,7 @@ def recursive_pop(config, key):
             recursive_pop(value, key)
+# TODO: refactor saving routines into a PresetSaver class?
 def make_preset_dir(preset):
     os.makedirs(preset, exist_ok=True)
@@ -314,19 +345,9 @@ def save_metadata(layer, preset):
         metadata_file.write(json.dumps(metadata, indent=4))
-def _validate_tokenizer(preset, allow_incomplete=False):
+def _validate_tokenizer(preset):
     if not check_file_exists(preset, TOKENIZER_CONFIG_FILE):
-        if allow_incomplete:
-            logging.warning(
-                f"`{TOKENIZER_CONFIG_FILE}` is missing from the preset directory `{preset}`."
-            )
-            return
-        else:
-            raise FileNotFoundError(
-                f"`{TOKENIZER_CONFIG_FILE}` is missing from the preset directory `{preset}`. "
-                "To upload the model without a tokenizer, "
-                "set `allow_incomplete=True`."
-            )
+        return
     config_path = get_file(preset, TOKENIZER_CONFIG_FILE)
     try:
         with open(config_path, encoding="utf-8") as config_file:
@@ -377,7 +398,7 @@ def _validate_backbone(preset):
         )
-def get_snake_case(name):
+def to_snake_case(name):
     name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
     return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
@@ -386,7 +407,7 @@ def create_model_card(preset):
     model_card_path = os.path.join(preset, README_FILE)
     markdown_content = ""
-    config = load_config(preset, CONFIG_FILE)
+    config = load_json(preset, CONFIG_FILE)
     model_name = (
         config["class_name"].replace("Backbone", "")
         if config["class_name"].endswith("Backbone")
@@ -395,7 +416,7 @@ def create_model_card(preset):
     task_type = None
     if check_file_exists(preset, TASK_CONFIG_FILE):
-        task_config = load_config(preset, TASK_CONFIG_FILE)
+        task_config = load_json(preset, TASK_CONFIG_FILE)
         task_type = (
             task_config["class_name"].replace(model_name, "")
             if task_config["class_name"].startswith(model_name)
@@ -407,12 +428,12 @@ def create_model_card(preset):
     markdown_content += "library_name: keras-hub\n"
     if task_type == "CausalLM":
         markdown_content += "pipeline_tag: text-generation\n"
-    elif task_type == "Classifier":
+    elif task_type == "TextClassifier":
         markdown_content += "pipeline_tag: text-classification\n"
     markdown_content += "---\n"
     model_link = (
-        f"https://keras.io/api/keras_hub/models/{get_snake_case(model_name)}"
+        f"https://keras.io/api/keras_hub/models/{to_snake_case(model_name)}"
     )
     markdown_content += (
         f"This is a [`{model_name}` model]({model_link}) "
@@ -454,7 +475,6 @@ def delete_model_card(preset):
 def upload_preset(
     uri,
     preset,
-    allow_incomplete=False,
 ):
     """Upload a preset directory to a model hub.
@@ -466,9 +486,6 @@ def upload_preset(
              `hf://[<HF_USERNAME>/]<MODEL>` will be uploaded to the Hugging
              Face Hub.
         preset: The path to the local model preset directory.
-        allow_incomplete: If True, allows the upload of presets without
-                          a tokenizer configuration. Otherwise, a tokenizer
-                          is required.
     """
     # Check if preset directory exists.
@@ -476,7 +493,7 @@ def upload_preset(
         raise FileNotFoundError(f"The preset directory {preset} doesn't exist.")
     _validate_backbone(preset)
-    _validate_tokenizer(preset, allow_incomplete)
+    _validate_tokenizer(preset)
     if uri.startswith(KAGGLE_PREFIX):
         if kagglehub is None:
@@ -533,42 +550,14 @@ def upload_preset(
         )
-def load_config(preset, config_file=CONFIG_FILE):
+def load_json(preset, config_file=CONFIG_FILE):
     config_path = get_file(preset, config_file)
     with open(config_path, encoding="utf-8") as config_file:
         config = json.load(config_file)
     return config
-def check_format(preset):
-    if check_file_exists(preset, SAFETENSOR_FILE) or check_file_exists(
-        preset, SAFETENSOR_CONFIG_FILE
-    ):
-        # Determine the format by parsing the config file.
-        config = load_config(preset, HF_CONFIG_FILE)
-        if "hf://timm" in preset or "architecture" in config:
-            return "timm"
-        return "transformers"
-    if not check_file_exists(preset, METADATA_FILE):
-        raise FileNotFoundError(
-            f"The preset directory `{preset}` doesn't have a file named `{METADATA_FILE}`, "
-            "or you do not have access to it. This file is required to load a Keras model "
-            "preset. Please verify that the model you are trying to load is a Keras model."
-        )
-    metadata = load_config(preset, METADATA_FILE)
-    if "keras_version" not in metadata:
-        raise ValueError(
-            f"`{METADATA_FILE}` in the preset directory `{preset}` doesn't have `keras_version`. "
-            "Please verify that the model you are trying to load is a Keras model."
-        )
-    return "keras"
-def load_serialized_object(preset, config_file=CONFIG_FILE, **kwargs):
-    kwargs = kwargs or {}
-    config = load_config(preset, config_file)
+def load_serialized_object(config, **kwargs):
     # `dtype` in config might be a serialized `DTypePolicy` or `DTypePolicyMap`.
     # Ensure that `dtype` is properly configured.
     dtype = kwargs.pop("dtype", None)
@@ -578,15 +567,18 @@ def load_serialized_object(preset, config_file=CONFIG_FILE, **kwargs):
     return keras.saving.deserialize_keras_object(config)
-def check_config_class(
-    preset,
-    config_file=CONFIG_FILE,
-):
+def check_config_class(config):
     """Validate a preset is being loaded on the correct class."""
-    config_path = get_file(preset, config_file)
-    with open(config_path, encoding="utf-8") as config_file:
-        config = json.load(config_file)
-    return keras.saving.get_registered_object(config["registered_name"])
+    registered_name = config["registered_name"]
+    cls = keras.saving.get_registered_object(registered_name)
+    if cls is None:
+        raise ValueError(
+            f"Attempting to load class {registered_name} with "
+            "`from_preset()`, but there is no class registered with Keras "
+            f"for {registered_name}. Make sure to register any custom "
+            "classes with `register_keras_serializable()`."
+        )
+    return cls
 def jax_memory_cleanup(layer):
@@ -619,3 +611,173 @@ def set_dtype_in_config(config, dtype=None):
         for k in policy_map_config["policy_map"].keys():
             policy_map_config["policy_map"][k]["config"]["source_name"] = dtype
     return config
+def get_preset_loader(preset):
+    if not check_file_exists(preset, CONFIG_FILE):
+        raise ValueError(
+            f"Preset {preset} has no {CONFIG_FILE}. Make sure the URI or "
+            "directory you are trying to load is a valid KerasHub preset and "
+            "and that you have permissions to read/download from this location."
+        )
+    # We currently assume all formats we support have a `config.json`, this is
+    # true, for Keras, Transformers, and timm. We infer the on disk format by
+    # inspecting the `config.json` file.
+    config = load_json(preset, CONFIG_FILE)
+    if "registered_name" in config:
+        # If we see registered_name, we assume a serialized Keras object.
+        return KerasPresetLoader(preset, config)
+    elif "model_type" in config:
+        # Avoid circular import.
+        from keras_hub.src.utils.transformers.preset_loader import (
+            TransformersPresetLoader,
+        )
+        # If we see model_type, we assume a Transformers style config.
+        return TransformersPresetLoader(preset, config)
+    elif "architecture" in config:
+        # Avoid circular import.
+        from keras_hub.src.utils.timm.preset_loader import TimmPresetLoader
+        # If we see "architecture", we assume a timm config. We could make this
+        # more robust later on if we need to.
+        return TimmPresetLoader(preset, config)
+    else:
+        contents = json.dumps(config, indent=4)
+        raise ValueError(
+            f"Unrecognized format for {CONFIG_FILE} in {preset}. "
+            "Create a preset with the `save_to_preset` utility on KerasHub "
+            f"models. Contents of {CONFIG_FILE}:\n{contents}"
+        )
+class PresetLoader:
+    def __init__(self, preset, config):
+        self.config = config
+        self.preset = preset
+    def check_backbone_class(self):
+        """Infer the backbone architecture."""
+        raise NotImplementedError
+    def load_backbone(self, cls, load_weights, **kwargs):
+        """Load the backbone model from the preset."""
+        raise NotImplementedError
+    def load_tokenizer(self, cls, **kwargs):
+        """Load a tokenizer layer from the preset."""
+        raise NotImplementedError
+    def load_audio_converter(self, cls, **kwargs):
+        """Load an audio converter layer from the preset."""
+        raise NotImplementedError
+    def load_image_converter(self, cls, **kwargs):
+        """Load an image converter layer from the preset."""
+        raise NotImplementedError
+    def load_task(self, cls, load_weights, load_task_weights, **kwargs):
+        """Load a task model from the preset.
+        By default, we create a task from a backbone and preprocessor with
+        default arguments. This means
+        """
+        if "backbone" not in kwargs:
+            backbone_class = cls.backbone_cls
+            # Forward dtype to backbone.
+            backbone_kwargs = {"dtype": kwargs.pop("dtype", None)}
+            kwargs["backbone"] = self.load_backbone(
+                backbone_class, load_weights, **backbone_kwargs
+            )
+        if "preprocessor" not in kwargs and cls.preprocessor_cls:
+            kwargs["preprocessor"] = self.load_preprocessor(
+                cls.preprocessor_cls,
+            )
+        return cls(**kwargs)
+    def load_preprocessor(self, cls, **kwargs):
+        """Load a prepocessor layer from the preset.
+        By default, we create a preprocessor from a tokenizer with default
+        arguments. This allow us to support transformers checkpoints by
+        only converting the backbone and tokenizer.
+        """
+        if "tokenizer" not in kwargs and cls.tokenizer_cls:
+            kwargs["tokenizer"] = self.load_tokenizer(cls.tokenizer_cls)
+        if "audio_converter" not in kwargs and cls.audio_converter_cls:
+            kwargs["audio_converter"] = self.load_audio_converter(
+                cls.audio_converter_cls
+            )
+        if "image_converter" not in kwargs and cls.image_converter_cls:
+            kwargs["image_converter"] = self.load_image_converter(
+                cls.image_converter_cls
+            )
+        return cls(**kwargs)
+class KerasPresetLoader(PresetLoader):
+    def check_backbone_class(self):
+        return check_config_class(self.config)
+    def load_backbone(self, cls, load_weights, **kwargs):
+        backbone = load_serialized_object(self.config, **kwargs)
+        if load_weights:
+            jax_memory_cleanup(backbone)
+            backbone.load_weights(get_file(self.preset, MODEL_WEIGHTS_FILE))
+        return backbone
+    def load_tokenizer(self, cls, **kwargs):
+        tokenizer_config = load_json(self.preset, TOKENIZER_CONFIG_FILE)
+        tokenizer = load_serialized_object(tokenizer_config, **kwargs)
+        tokenizer.load_preset_assets(self.preset)
+        return tokenizer
+    def load_audio_converter(self, cls, **kwargs):
+        converter_config = load_json(self.preset, AUDIO_CONVERTER_CONFIG_FILE)
+        return load_serialized_object(converter_config, **kwargs)
+    def load_image_converter(self, cls, **kwargs):
+        converter_config = load_json(self.preset, IMAGE_CONVERTER_CONFIG_FILE)
+        return load_serialized_object(converter_config, **kwargs)
+    def load_task(self, cls, load_weights, load_task_weights, **kwargs):
+        # If there is no `task.json` or it's for the wrong class delegate to the
+        # super class loader.
+        if not check_file_exists(self.preset, TASK_CONFIG_FILE):
+            return super().load_task(
+                cls, load_weights, load_task_weights, **kwargs
+            )
+        task_config = load_json(self.preset, TASK_CONFIG_FILE)
+        if not issubclass(check_config_class(task_config), cls):
+            return super().load_task(
+                cls, load_weights, load_task_weights, **kwargs
+            )
+        # We found a `task.json` with a complete config for our class.
+        task = load_serialized_object(task_config, **kwargs)
+        if task.preprocessor and task.preprocessor.tokenizer:
+            task.preprocessor.tokenizer.load_preset_assets(self.preset)
+        if load_weights:
+            has_task_weights = check_file_exists(self.preset, TASK_WEIGHTS_FILE)
+            if has_task_weights and load_task_weights:
+                jax_memory_cleanup(task)
+                task_weights = get_file(self.preset, TASK_WEIGHTS_FILE)
+                task.load_task_weights(task_weights)
+            else:
+                jax_memory_cleanup(task.backbone)
+            backbone_weights = get_file(self.preset, MODEL_WEIGHTS_FILE)
+            task.backbone.load_weights(backbone_weights)
+        return task
+    def load_preprocessor(self, cls, **kwargs):
+        # If there is no `preprocessing.json` or it's for the wrong class,
+        # delegate to the super class loader.
+        if not check_file_exists(self.preset, PREPROCESSOR_CONFIG_FILE):
+            return super().load_preprocessor(cls, **kwargs)
+        preprocessor_json = load_json(self.preset, PREPROCESSOR_CONFIG_FILE)
+        if not issubclass(check_config_class(preprocessor_json), cls):
+            return super().load_preprocessor(cls, **kwargs)
+        # We found a `preprocessing.json` with a complete config for our class.
+        preprocessor = load_serialized_object(preprocessor_json, **kwargs)
+        preprocessor.tokenizer.load_preset_assets(self.preset)
+        return preprocessor

keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl

keras-hub-nightly 0.15.0.dev20240823171555py3-none-any.whl → 0.16.0.dev2024092017py3-none-any.whl