keras-hub-nightly 0.16.1.dev202410020340__py3-none-any.whl → 0.19.0.dev202501260345__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +21 -3
- keras_hub/api/models/__init__.py +71 -12
- keras_hub/api/tokenizers/__init__.py +1 -1
- keras_hub/src/bounding_box/__init__.py +2 -0
- keras_hub/src/bounding_box/converters.py +102 -12
- keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
- keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
- keras_hub/src/layers/modeling/reversible_embedding.py +3 -16
- keras_hub/src/layers/modeling/rms_normalization.py +36 -0
- keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
- keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
- keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
- keras_hub/src/layers/modeling/transformer_encoder.py +29 -7
- keras_hub/src/layers/preprocessing/audio_converter.py +3 -7
- keras_hub/src/layers/preprocessing/image_converter.py +170 -34
- keras_hub/src/metrics/bleu.py +4 -3
- keras_hub/src/models/albert/albert_presets.py +4 -12
- keras_hub/src/models/albert/albert_text_classifier.py +7 -7
- keras_hub/src/models/backbone.py +3 -14
- keras_hub/src/models/bart/bart_backbone.py +4 -4
- keras_hub/src/models/bart/bart_presets.py +3 -9
- keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
- keras_hub/src/models/basnet/__init__.py +5 -0
- keras_hub/src/models/basnet/basnet.py +122 -0
- keras_hub/src/models/basnet/basnet_backbone.py +366 -0
- keras_hub/src/models/basnet/basnet_image_converter.py +8 -0
- keras_hub/src/models/basnet/basnet_preprocessor.py +14 -0
- keras_hub/src/models/basnet/basnet_presets.py +17 -0
- keras_hub/src/models/bert/bert_presets.py +14 -32
- keras_hub/src/models/bert/bert_text_classifier.py +3 -3
- keras_hub/src/models/bloom/bloom_presets.py +8 -24
- keras_hub/src/models/causal_lm.py +56 -12
- keras_hub/src/models/clip/__init__.py +5 -0
- keras_hub/src/models/clip/clip_backbone.py +286 -0
- keras_hub/src/models/clip/clip_encoder_block.py +19 -4
- keras_hub/src/models/clip/clip_image_converter.py +8 -0
- keras_hub/src/models/clip/clip_presets.py +93 -0
- keras_hub/src/models/clip/clip_text_encoder.py +4 -1
- keras_hub/src/models/clip/clip_tokenizer.py +18 -3
- keras_hub/src/models/clip/clip_vision_embedding.py +101 -0
- keras_hub/src/models/clip/clip_vision_encoder.py +159 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
- keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py +0 -109
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
- keras_hub/src/models/deberta_v3/deberta_v3_presets.py +5 -15
- keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
- keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
- keras_hub/src/models/deeplab_v3/__init__.py +7 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +200 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_image_converter.py +10 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_image_segmeter_preprocessor.py +16 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py +215 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +17 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +111 -0
- keras_hub/src/models/densenet/densenet_backbone.py +6 -4
- keras_hub/src/models/densenet/densenet_image_classifier.py +1 -129
- keras_hub/src/models/densenet/densenet_image_converter.py +2 -4
- keras_hub/src/models/densenet/densenet_presets.py +9 -15
- keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/distil_bert/distil_bert_presets.py +5 -10
- keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
- keras_hub/src/models/efficientnet/__init__.py +9 -0
- keras_hub/src/models/efficientnet/cba.py +141 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +160 -61
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +14 -0
- keras_hub/src/models/efficientnet/efficientnet_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/efficientnet/efficientnet_image_converter.py +10 -0
- keras_hub/src/models/efficientnet/efficientnet_presets.py +193 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +84 -41
- keras_hub/src/models/efficientnet/mbconv.py +53 -22
- keras_hub/src/models/electra/electra_backbone.py +2 -2
- keras_hub/src/models/electra/electra_presets.py +6 -18
- keras_hub/src/models/f_net/f_net_presets.py +2 -6
- keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
- keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
- keras_hub/src/models/falcon/falcon_backbone.py +5 -3
- keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
- keras_hub/src/models/falcon/falcon_presets.py +1 -3
- keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
- keras_hub/src/models/feature_pyramid_backbone.py +1 -1
- keras_hub/src/models/flux/__init__.py +5 -0
- keras_hub/src/models/flux/flux_layers.py +496 -0
- keras_hub/src/models/flux/flux_maths.py +225 -0
- keras_hub/src/models/flux/flux_model.py +236 -0
- keras_hub/src/models/flux/flux_presets.py +3 -0
- keras_hub/src/models/flux/flux_text_to_image.py +146 -0
- keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +73 -0
- keras_hub/src/models/gemma/gemma_backbone.py +35 -20
- keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
- keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
- keras_hub/src/models/gemma/gemma_presets.py +29 -63
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
- keras_hub/src/models/gpt2/gpt2_presets.py +5 -14
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
- keras_hub/src/models/image_classifier.py +147 -2
- keras_hub/src/models/image_classifier_preprocessor.py +6 -3
- keras_hub/src/models/image_object_detector.py +87 -0
- keras_hub/src/models/image_object_detector_preprocessor.py +57 -0
- keras_hub/src/models/image_segmenter.py +0 -5
- keras_hub/src/models/image_segmenter_preprocessor.py +29 -4
- keras_hub/src/models/image_to_image.py +417 -0
- keras_hub/src/models/inpaint.py +520 -0
- keras_hub/src/models/llama/llama_backbone.py +138 -12
- keras_hub/src/models/llama/llama_causal_lm.py +3 -1
- keras_hub/src/models/llama/llama_presets.py +10 -20
- keras_hub/src/models/llama3/llama3_backbone.py +12 -11
- keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
- keras_hub/src/models/llama3/llama3_presets.py +4 -12
- keras_hub/src/models/llama3/llama3_tokenizer.py +25 -2
- keras_hub/src/models/mistral/mistral_backbone.py +16 -15
- keras_hub/src/models/mistral/mistral_causal_lm.py +6 -4
- keras_hub/src/models/mistral/mistral_presets.py +3 -9
- keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
- keras_hub/src/models/mit/__init__.py +6 -0
- keras_hub/src/models/{mix_transformer/mix_transformer_backbone.py → mit/mit_backbone.py} +47 -36
- keras_hub/src/models/mit/mit_image_classifier.py +12 -0
- keras_hub/src/models/mit/mit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/mit/mit_image_converter.py +8 -0
- keras_hub/src/models/{mix_transformer/mix_transformer_layers.py → mit/mit_layers.py} +20 -13
- keras_hub/src/models/mit/mit_presets.py +139 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +8 -8
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +0 -92
- keras_hub/src/models/opt/opt_causal_lm.py +2 -2
- keras_hub/src/models/opt/opt_presets.py +4 -12
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +63 -17
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py +3 -1
- keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py +21 -23
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +2 -4
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +173 -17
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +14 -26
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -1
- keras_hub/src/models/phi3/phi3_decoder.py +0 -1
- keras_hub/src/models/phi3/phi3_presets.py +2 -6
- keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
- keras_hub/src/models/preprocessor.py +25 -11
- keras_hub/src/models/resnet/resnet_backbone.py +3 -14
- keras_hub/src/models/resnet/resnet_image_classifier.py +0 -137
- keras_hub/src/models/resnet/resnet_image_converter.py +2 -4
- keras_hub/src/models/resnet/resnet_presets.py +127 -18
- keras_hub/src/models/retinanet/__init__.py +5 -0
- keras_hub/src/models/retinanet/anchor_generator.py +52 -53
- keras_hub/src/models/retinanet/feature_pyramid.py +103 -39
- keras_hub/src/models/retinanet/non_max_supression.py +1 -0
- keras_hub/src/models/retinanet/prediction_head.py +192 -0
- keras_hub/src/models/retinanet/retinanet_backbone.py +146 -0
- keras_hub/src/models/retinanet/retinanet_image_converter.py +53 -0
- keras_hub/src/models/retinanet/retinanet_label_encoder.py +49 -51
- keras_hub/src/models/retinanet/retinanet_object_detector.py +381 -0
- keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/retinanet/retinanet_presets.py +16 -0
- keras_hub/src/models/roberta/roberta_backbone.py +2 -2
- keras_hub/src/models/roberta/roberta_presets.py +6 -8
- keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
- keras_hub/src/models/sam/__init__.py +5 -0
- keras_hub/src/models/sam/sam_backbone.py +2 -3
- keras_hub/src/models/sam/sam_image_converter.py +2 -4
- keras_hub/src/models/sam/sam_image_segmenter.py +16 -16
- keras_hub/src/models/sam/sam_image_segmenter_preprocessor.py +11 -1
- keras_hub/src/models/sam/sam_layers.py +5 -3
- keras_hub/src/models/sam/sam_presets.py +3 -9
- keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
- keras_hub/src/models/sam/sam_transformer.py +5 -4
- keras_hub/src/models/segformer/__init__.py +8 -0
- keras_hub/src/models/segformer/segformer_backbone.py +167 -0
- keras_hub/src/models/segformer/segformer_image_converter.py +8 -0
- keras_hub/src/models/segformer/segformer_image_segmenter.py +184 -0
- keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +31 -0
- keras_hub/src/models/segformer/segformer_presets.py +136 -0
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
- keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +8 -1
- keras_hub/src/models/stable_diffusion_3/mmdit.py +577 -190
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +189 -163
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +178 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +193 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +43 -7
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +25 -14
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
- keras_hub/src/models/t5/t5_backbone.py +5 -4
- keras_hub/src/models/t5/t5_presets.py +47 -19
- keras_hub/src/models/task.py +47 -39
- keras_hub/src/models/text_classifier.py +2 -2
- keras_hub/src/models/text_to_image.py +106 -41
- keras_hub/src/models/vae/__init__.py +1 -0
- keras_hub/src/models/vae/vae_backbone.py +184 -0
- keras_hub/src/models/vae/vae_layers.py +739 -0
- keras_hub/src/models/vgg/__init__.py +5 -0
- keras_hub/src/models/vgg/vgg_backbone.py +4 -24
- keras_hub/src/models/vgg/vgg_image_classifier.py +139 -33
- keras_hub/src/models/vgg/vgg_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vgg/vgg_image_converter.py +8 -0
- keras_hub/src/models/vgg/vgg_presets.py +48 -0
- keras_hub/src/models/vit/__init__.py +5 -0
- keras_hub/src/models/vit/vit_backbone.py +152 -0
- keras_hub/src/models/vit/vit_image_classifier.py +187 -0
- keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vit/vit_image_converter.py +73 -0
- keras_hub/src/models/vit/vit_layers.py +391 -0
- keras_hub/src/models/vit/vit_presets.py +126 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +6 -4
- keras_hub/src/models/vit_det/vit_layers.py +3 -3
- keras_hub/src/models/whisper/whisper_audio_converter.py +2 -4
- keras_hub/src/models/whisper/whisper_backbone.py +6 -5
- keras_hub/src/models/whisper/whisper_decoder.py +3 -5
- keras_hub/src/models/whisper/whisper_presets.py +10 -30
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/xlm_roberta/xlm_roberta_presets.py +2 -6
- keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
- keras_hub/src/models/xlnet/relative_attention.py +20 -19
- keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
- keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
- keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
- keras_hub/src/samplers/contrastive_sampler.py +2 -3
- keras_hub/src/samplers/sampler.py +2 -1
- keras_hub/src/tests/test_case.py +41 -6
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +7 -3
- keras_hub/src/tokenizers/byte_tokenizer.py +3 -10
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
- keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +9 -11
- keras_hub/src/tokenizers/tokenizer.py +10 -13
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +9 -7
- keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +10 -3
- keras_hub/src/utils/keras_utils.py +2 -13
- keras_hub/src/utils/pipeline_model.py +3 -3
- keras_hub/src/utils/preset_utils.py +196 -144
- keras_hub/src/utils/tensor_utils.py +4 -4
- keras_hub/src/utils/timm/convert_densenet.py +6 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +447 -0
- keras_hub/src/utils/timm/convert_resnet.py +1 -1
- keras_hub/src/utils/timm/convert_vgg.py +85 -0
- keras_hub/src/utils/timm/preset_loader.py +14 -9
- keras_hub/src/utils/transformers/convert_llama3.py +21 -5
- keras_hub/src/utils/transformers/convert_vit.py +150 -0
- keras_hub/src/utils/transformers/preset_loader.py +23 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/METADATA +86 -68
- keras_hub_nightly-0.19.0.dev202501260345.dist-info/RECORD +423 -0
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/WHEEL +1 -1
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +0 -138
- keras_hub/src/models/mix_transformer/__init__.py +0 -0
- keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +0 -119
- keras_hub/src/models/stable_diffusion_3/vae_image_decoder.py +0 -320
- keras_hub_nightly-0.16.1.dev202410020340.dist-info/RECORD +0 -357
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,150 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from keras_hub.src.models.vit.vit_backbone import ViTBackbone
|
4
|
+
|
5
|
+
backbone_cls = ViTBackbone
|
6
|
+
|
7
|
+
|
8
|
+
def convert_backbone_config(transformers_config):
|
9
|
+
image_size = transformers_config["image_size"]
|
10
|
+
return {
|
11
|
+
"image_shape": (image_size, image_size, 3),
|
12
|
+
"patch_size": transformers_config["patch_size"],
|
13
|
+
"num_layers": transformers_config["num_hidden_layers"],
|
14
|
+
"num_heads": transformers_config["num_attention_heads"],
|
15
|
+
"hidden_dim": transformers_config["hidden_size"],
|
16
|
+
"mlp_dim": transformers_config["intermediate_size"],
|
17
|
+
"dropout_rate": transformers_config["hidden_dropout_prob"],
|
18
|
+
"attention_dropout": transformers_config[
|
19
|
+
"attention_probs_dropout_prob"
|
20
|
+
],
|
21
|
+
"use_mha_bias": transformers_config["qkv_bias"],
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
def convert_weights(backbone, loader, transformers_config):
|
26
|
+
def port_ln(keras_variable, weight_key):
|
27
|
+
loader.port_weight(keras_variable.gamma, f"{weight_key}.weight")
|
28
|
+
loader.port_weight(keras_variable.beta, f"{weight_key}.bias")
|
29
|
+
|
30
|
+
def port_dense(keras_variable, weight_key):
|
31
|
+
loader.port_weight(
|
32
|
+
keras_variable.kernel,
|
33
|
+
f"{weight_key}.weight",
|
34
|
+
hook_fn=lambda x, _: x.T,
|
35
|
+
)
|
36
|
+
if keras_variable.bias is not None:
|
37
|
+
loader.port_weight(keras_variable.bias, f"{weight_key}.bias")
|
38
|
+
|
39
|
+
def port_mha(keras_variable, weight_key, num_heads, hidden_dim):
|
40
|
+
# query
|
41
|
+
loader.port_weight(
|
42
|
+
keras_variable.query_dense.kernel,
|
43
|
+
f"{weight_key}.attention.query.weight",
|
44
|
+
hook_fn=lambda x, _: np.reshape(
|
45
|
+
x.T, (hidden_dim, num_heads, hidden_dim // num_heads)
|
46
|
+
),
|
47
|
+
)
|
48
|
+
loader.port_weight(
|
49
|
+
keras_variable.query_dense.bias,
|
50
|
+
f"{weight_key}.attention.query.bias",
|
51
|
+
hook_fn=lambda x, _: np.reshape(
|
52
|
+
x, (num_heads, hidden_dim // num_heads)
|
53
|
+
),
|
54
|
+
)
|
55
|
+
# key
|
56
|
+
loader.port_weight(
|
57
|
+
keras_variable.key_dense.kernel,
|
58
|
+
f"{weight_key}.attention.key.weight",
|
59
|
+
hook_fn=lambda x, _: np.reshape(
|
60
|
+
x.T, (hidden_dim, num_heads, hidden_dim // num_heads)
|
61
|
+
),
|
62
|
+
)
|
63
|
+
loader.port_weight(
|
64
|
+
keras_variable.key_dense.bias,
|
65
|
+
f"{weight_key}.attention.key.bias",
|
66
|
+
hook_fn=lambda x, _: np.reshape(
|
67
|
+
x, (num_heads, hidden_dim // num_heads)
|
68
|
+
),
|
69
|
+
)
|
70
|
+
# value
|
71
|
+
loader.port_weight(
|
72
|
+
keras_variable.value_dense.kernel,
|
73
|
+
f"{weight_key}.attention.value.weight",
|
74
|
+
hook_fn=lambda x, _: np.reshape(
|
75
|
+
x.T, (hidden_dim, num_heads, hidden_dim // num_heads)
|
76
|
+
),
|
77
|
+
)
|
78
|
+
loader.port_weight(
|
79
|
+
keras_variable.value_dense.bias,
|
80
|
+
f"{weight_key}.attention.value.bias",
|
81
|
+
hook_fn=lambda x, _: np.reshape(
|
82
|
+
x, (num_heads, hidden_dim // num_heads)
|
83
|
+
),
|
84
|
+
)
|
85
|
+
# output
|
86
|
+
loader.port_weight(
|
87
|
+
keras_variable.output_dense.kernel,
|
88
|
+
f"{weight_key}.output.dense.weight",
|
89
|
+
hook_fn=lambda x, _: np.reshape(
|
90
|
+
x.T, (num_heads, hidden_dim // num_heads, hidden_dim)
|
91
|
+
),
|
92
|
+
)
|
93
|
+
loader.port_weight(
|
94
|
+
keras_variable.output_dense.bias, f"{weight_key}.output.dense.bias"
|
95
|
+
)
|
96
|
+
|
97
|
+
loader.port_weight(
|
98
|
+
keras_variable=backbone.layers[1].patch_embedding.kernel,
|
99
|
+
hf_weight_key="vit.embeddings.patch_embeddings.projection.weight",
|
100
|
+
hook_fn=lambda x, _: np.transpose(x, (2, 3, 1, 0)),
|
101
|
+
)
|
102
|
+
|
103
|
+
loader.port_weight(
|
104
|
+
backbone.layers[1].patch_embedding.bias,
|
105
|
+
"vit.embeddings.patch_embeddings.projection.bias",
|
106
|
+
)
|
107
|
+
|
108
|
+
loader.port_weight(
|
109
|
+
backbone.layers[1].class_token,
|
110
|
+
"vit.embeddings.cls_token",
|
111
|
+
)
|
112
|
+
|
113
|
+
loader.port_weight(
|
114
|
+
backbone.layers[1].position_embedding.embeddings,
|
115
|
+
"vit.embeddings.position_embeddings",
|
116
|
+
hook_fn=lambda x, _: x[0],
|
117
|
+
)
|
118
|
+
encoder_layers = backbone.layers[2].encoder_layers
|
119
|
+
for i, encoder_block in enumerate(encoder_layers):
|
120
|
+
prefix = "vit.encoder.layer"
|
121
|
+
num_heads = encoder_block.num_heads
|
122
|
+
hidden_dim = encoder_block.hidden_dim
|
123
|
+
|
124
|
+
port_mha(
|
125
|
+
encoder_block.mha,
|
126
|
+
f"{prefix}.{i}.attention",
|
127
|
+
num_heads,
|
128
|
+
hidden_dim,
|
129
|
+
)
|
130
|
+
port_ln(encoder_block.layer_norm_1, f"{prefix}.{i}.layernorm_before")
|
131
|
+
port_ln(encoder_block.layer_norm_2, f"{prefix}.{i}.layernorm_after")
|
132
|
+
|
133
|
+
port_dense(
|
134
|
+
encoder_block.mlp.dense_1, f"{prefix}.{i}.intermediate.dense"
|
135
|
+
)
|
136
|
+
port_dense(encoder_block.mlp.dense_2, f"{prefix}.{i}.output.dense")
|
137
|
+
port_ln(backbone.layers[2].layer_norm, "vit.layernorm")
|
138
|
+
|
139
|
+
|
140
|
+
def convert_head(task, loader, transformers_config):
|
141
|
+
prefix = "classifier."
|
142
|
+
loader.port_weight(
|
143
|
+
task.output_dense.kernel,
|
144
|
+
hf_weight_key=prefix + "weight",
|
145
|
+
hook_fn=lambda x, _: x.T,
|
146
|
+
)
|
147
|
+
loader.port_weight(
|
148
|
+
task.output_dense.bias,
|
149
|
+
hf_weight_key=prefix + "bias",
|
150
|
+
)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Convert huggingface models to KerasHub."""
|
2
2
|
|
3
|
+
from keras_hub.src.models.image_classifier import ImageClassifier
|
3
4
|
from keras_hub.src.utils.preset_utils import PresetLoader
|
4
5
|
from keras_hub.src.utils.preset_utils import jax_memory_cleanup
|
5
6
|
from keras_hub.src.utils.transformers import convert_albert
|
@@ -11,6 +12,7 @@ from keras_hub.src.utils.transformers import convert_gpt2
|
|
11
12
|
from keras_hub.src.utils.transformers import convert_llama3
|
12
13
|
from keras_hub.src.utils.transformers import convert_mistral
|
13
14
|
from keras_hub.src.utils.transformers import convert_pali_gemma
|
15
|
+
from keras_hub.src.utils.transformers import convert_vit
|
14
16
|
from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
|
15
17
|
|
16
18
|
|
@@ -37,6 +39,8 @@ class TransformersPresetLoader(PresetLoader):
|
|
37
39
|
self.converter = convert_mistral
|
38
40
|
elif model_type == "paligemma":
|
39
41
|
self.converter = convert_pali_gemma
|
42
|
+
elif model_type == "vit":
|
43
|
+
self.converter = convert_vit
|
40
44
|
else:
|
41
45
|
raise ValueError(
|
42
46
|
"KerasHub has no converter for huggingface/transformers models "
|
@@ -55,6 +59,25 @@ class TransformersPresetLoader(PresetLoader):
|
|
55
59
|
self.converter.convert_weights(backbone, loader, self.config)
|
56
60
|
return backbone
|
57
61
|
|
62
|
+
def load_task(self, cls, load_weights, load_task_weights, **kwargs):
|
63
|
+
architecture = self.config["architectures"][0]
|
64
|
+
if (
|
65
|
+
not load_task_weights
|
66
|
+
or not issubclass(cls, ImageClassifier)
|
67
|
+
or architecture == "ViTModel"
|
68
|
+
):
|
69
|
+
return super().load_task(
|
70
|
+
cls, load_weights, load_task_weights, **kwargs
|
71
|
+
)
|
72
|
+
# Support loading the classification head for classifier models.
|
73
|
+
if architecture == "ViTForImageClassification":
|
74
|
+
kwargs["num_classes"] = len(self.config["id2label"])
|
75
|
+
task = super().load_task(cls, load_weights, load_task_weights, **kwargs)
|
76
|
+
if load_task_weights:
|
77
|
+
with SafetensorLoader(self.preset, prefix="") as loader:
|
78
|
+
self.converter.convert_head(task, loader, self.config)
|
79
|
+
return task
|
80
|
+
|
58
81
|
def load_tokenizer(self, cls, config_name="tokenizer.json", **kwargs):
|
59
82
|
return self.converter.convert_tokenizer(cls, self.preset, **kwargs)
|
60
83
|
|
@@ -42,12 +42,13 @@ class SafetensorLoader(contextlib.ExitStack):
|
|
42
42
|
"""
|
43
43
|
Determine and return a prefixed key for a given hf weight key.
|
44
44
|
|
45
|
-
This method checks if there's a common prefix for the weight keys and
|
46
|
-
for future use.
|
45
|
+
This method checks if there's a common prefix for the weight keys and
|
46
|
+
caches it for future use.
|
47
47
|
|
48
48
|
Args:
|
49
49
|
hf_weight_key (str): The hf weight key to check for a prefix.
|
50
|
-
dict_like (object): An object to get keys of safetensor file using
|
50
|
+
dict_like (object): An object to get keys of safetensor file using
|
51
|
+
keys() method.
|
51
52
|
|
52
53
|
Returns:
|
53
54
|
str: The full key including the prefix (if any).
|
keras_hub/src/version_utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: keras-hub-nightly
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.19.0.dev202501260345
|
4
4
|
Summary: Industry-strength Natural Language Processing extensions for Keras.
|
5
5
|
Home-page: https://github.com/keras-team/keras-hub
|
6
6
|
Author: Keras team
|
@@ -20,129 +20,147 @@ Classifier: Topic :: Scientific/Engineering
|
|
20
20
|
Classifier: Topic :: Software Development
|
21
21
|
Requires-Python: >=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
+
Requires-Dist: keras>=3.5
|
23
24
|
Requires-Dist: absl-py
|
24
25
|
Requires-Dist: numpy
|
25
26
|
Requires-Dist: packaging
|
26
27
|
Requires-Dist: regex
|
27
28
|
Requires-Dist: rich
|
28
29
|
Requires-Dist: kagglehub
|
29
|
-
Requires-Dist: tensorflow-text
|
30
|
+
Requires-Dist: tensorflow-text
|
30
31
|
Provides-Extra: extras
|
31
|
-
Requires-Dist: rouge-score
|
32
|
-
Requires-Dist: sentencepiece
|
33
|
-
|
34
|
-
|
32
|
+
Requires-Dist: rouge-score; extra == "extras"
|
33
|
+
Requires-Dist: sentencepiece; extra == "extras"
|
34
|
+
Dynamic: author
|
35
|
+
Dynamic: author-email
|
36
|
+
Dynamic: classifier
|
37
|
+
Dynamic: description
|
38
|
+
Dynamic: description-content-type
|
39
|
+
Dynamic: home-page
|
40
|
+
Dynamic: license
|
41
|
+
Dynamic: provides-extra
|
42
|
+
Dynamic: requires-dist
|
43
|
+
Dynamic: requires-python
|
44
|
+
Dynamic: summary
|
45
|
+
|
46
|
+
# KerasHub: Multi-framework Pretrained Models
|
35
47
|
[](https://github.com/keras-team/keras-hub/actions?query=workflow%3ATests+branch%3Amaster)
|
36
48
|

|
37
49
|
[](https://github.com/keras-team/keras-hub/issues)
|
38
50
|
|
39
51
|
> [!IMPORTANT]
|
40
|
-
> 📢 KerasNLP is
|
52
|
+
> 📢 KerasNLP is now KerasHub! 📢 Read
|
41
53
|
> [the announcement](https://github.com/keras-team/keras-hub/issues/1831).
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
This library is an extension of the core Keras API; all high-level modules are
|
54
|
-
Layers and Models that receive that same level of polish as core Keras.
|
55
|
-
If you are familiar with Keras, congratulations! You already understand most of
|
56
|
-
KerasHub.
|
54
|
+
|
55
|
+
**KerasHub** is a pretrained modeling library that aims to be simple, flexible,
|
56
|
+
and fast. The library provides [Keras 3](https://keras.io/keras_3/)
|
57
|
+
implementations of popular model architectures, paired with a collection of
|
58
|
+
pretrained checkpoints available on [Kaggle Models](https://kaggle.com/models/).
|
59
|
+
Models can be used with text, image, and audio data for generation, classification,
|
60
|
+
and many other built in tasks.
|
61
|
+
|
62
|
+
KerasHub is an extension of the core Keras API; KerasHub components are provided
|
63
|
+
as `Layer` and `Model` implementations. If you are familiar with Keras,
|
64
|
+
congratulations! You already understand most of KerasHub.
|
57
65
|
|
58
66
|
All models support JAX, TensorFlow, and PyTorch from a single model
|
59
67
|
definition and can be fine-tuned on GPUs and TPUs out of the box. Models can
|
60
68
|
be trained on individual accelerators with built-in PEFT techniques, or
|
61
69
|
fine-tuned at scale with model and data parallel training. See our
|
62
|
-
[Getting Started guide](https://keras.io/guides/
|
63
|
-
to start learning our API.
|
64
|
-
[Kaggle](https://www.kaggle.com/organizations/keras/models).
|
65
|
-
We welcome contributions.
|
70
|
+
[Getting Started guide](https://keras.io/guides/keras_hub/getting_started)
|
71
|
+
to start learning our API.
|
66
72
|
|
67
73
|
## Quick Links
|
68
74
|
|
69
75
|
### For everyone
|
70
76
|
|
71
|
-
- [Home
|
72
|
-
- [
|
73
|
-
- [
|
74
|
-
- [
|
77
|
+
- [Home page](https://keras.io/keras_hub)
|
78
|
+
- [Getting started](https://keras.io/keras_hub/getting_started)
|
79
|
+
- [Guides](https://keras.io/keras_hub/guides)
|
80
|
+
- [API documentation](https://keras.io/keras_hub/api)
|
81
|
+
- [Pre-trained models](https://keras.io/keras_hub/presets/)
|
75
82
|
|
76
83
|
### For contributors
|
77
84
|
|
85
|
+
- [Call for Contributions](https://github.com/keras-team/keras-hub/issues/1835)
|
86
|
+
- [Roadmap](https://github.com/keras-team/keras-hub/issues/1836)
|
78
87
|
- [Contributing Guide](CONTRIBUTING.md)
|
79
|
-
- [Roadmap](ROADMAP.md)
|
80
88
|
- [Style Guide](STYLE_GUIDE.md)
|
81
89
|
- [API Design Guide](API_DESIGN_GUIDE.md)
|
82
|
-
- [Call for Contributions](https://github.com/keras-team/keras-hub/issues?q=is%3Aissue+is%3Aopen+label%3A%22contributions+welcome%22)
|
83
90
|
|
84
91
|
## Quickstart
|
85
92
|
|
86
|
-
|
93
|
+
Choose a backend:
|
87
94
|
|
88
95
|
```python
|
89
96
|
import os
|
90
97
|
os.environ["KERAS_BACKEND"] = "jax" # Or "tensorflow" or "torch"!
|
98
|
+
```
|
91
99
|
|
92
|
-
|
100
|
+
Import KerasHub and other libraries:
|
101
|
+
|
102
|
+
```python
|
103
|
+
import keras
|
104
|
+
import keras_hub
|
105
|
+
import numpy as np
|
93
106
|
import tensorflow_datasets as tfds
|
107
|
+
```
|
108
|
+
|
109
|
+
Load a resnet model and use it to predict a label for an image:
|
110
|
+
|
111
|
+
```python
|
112
|
+
classifier = keras_hub.models.ImageClassifier.from_preset(
|
113
|
+
"resnet_50_imagenet",
|
114
|
+
activation="softmax",
|
115
|
+
)
|
116
|
+
url = "https://upload.wikimedia.org/wikipedia/commons/a/aa/California_quail.jpg"
|
117
|
+
path = keras.utils.get_file(origin=url)
|
118
|
+
image = keras.utils.load_img(path)
|
119
|
+
preds = classifier.predict(np.array([image]))
|
120
|
+
print(keras_hub.utils.decode_imagenet_predictions(preds))
|
121
|
+
```
|
122
|
+
|
123
|
+
Load a Bert model and fine-tune it on IMDb movie reviews:
|
94
124
|
|
125
|
+
```python
|
126
|
+
classifier = keras_hub.models.BertClassifier.from_preset(
|
127
|
+
"bert_base_en_uncased",
|
128
|
+
activation="softmax",
|
129
|
+
num_classes=2,
|
130
|
+
)
|
95
131
|
imdb_train, imdb_test = tfds.load(
|
96
132
|
"imdb_reviews",
|
97
133
|
split=["train", "test"],
|
98
134
|
as_supervised=True,
|
99
135
|
batch_size=16,
|
100
136
|
)
|
101
|
-
|
102
|
-
# Load a BERT model.
|
103
|
-
classifier = keras_nlp.models.Classifier.from_preset(
|
104
|
-
"bert_base_en",
|
105
|
-
num_classes=2,
|
106
|
-
activation="softmax",
|
107
|
-
)
|
108
|
-
|
109
|
-
# Fine-tune on IMDb movie reviews.
|
110
137
|
classifier.fit(imdb_train, validation_data=imdb_test)
|
111
|
-
|
112
|
-
|
138
|
+
preds = classifier.predict(["What an amazing movie!", "A total waste of time."])
|
139
|
+
print(preds)
|
113
140
|
```
|
114
141
|
|
115
|
-
Try it out [in a colab](https://colab.research.google.com/gist/mattdangerw/e457e42d5ea827110c8d5cb4eb9d9a07/kerasnlp-quickstart.ipynb).
|
116
|
-
For more in depth guides and examples, visit
|
117
|
-
[keras.io/keras_nlp](https://keras.io/keras_nlp/).
|
118
|
-
|
119
142
|
## Installation
|
120
143
|
|
121
|
-
|
122
|
-
introduce breaking changes to the API in future versions. For a stable and
|
123
|
-
supported experience, we recommend installing `keras-nlp` version 0.15.1:
|
144
|
+
To install the latest KerasHub release with Keras 3, simply run:
|
124
145
|
|
125
|
-
```
|
126
|
-
pip install keras-
|
146
|
+
```
|
147
|
+
pip install --upgrade keras-hub
|
127
148
|
```
|
128
149
|
|
129
|
-
To
|
130
|
-
our nightly package
|
150
|
+
To install the latest nightly changes for both KerasHub and Keras, you can use
|
151
|
+
our nightly package.
|
131
152
|
|
132
|
-
```
|
133
|
-
pip install keras-hub-nightly
|
153
|
+
```
|
154
|
+
pip install --upgrade keras-hub-nightly
|
134
155
|
```
|
135
156
|
|
136
|
-
KerasHub
|
137
|
-
`tf.data` API for preprocessing.
|
138
|
-
|
139
|
-
|
140
|
-
Read [Getting started with Keras](https://keras.io/getting_started/) for more
|
141
|
-
information on installing Keras 3 and compatibility with different frameworks.
|
157
|
+
Currently, installing KerasHub will always pull in TensorFlow for use of the
|
158
|
+
`tf.data` API for preprocessing. When pre-processing with `tf.data`, training
|
159
|
+
can still happen on any backend.
|
142
160
|
|
143
|
-
|
144
|
-
|
145
|
-
|
161
|
+
Visit the [core Keras getting started page](https://keras.io/getting_started/)
|
162
|
+
for more information on installing Keras 3, accelerator support, and
|
163
|
+
compatibility with different frameworks.
|
146
164
|
|
147
165
|
## Configuring your backend
|
148
166
|
|