keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/__init__.py +0 -6
- keras_hub/api/__init__.py +2 -0
- keras_hub/api/bounding_box/__init__.py +36 -0
- keras_hub/api/layers/__init__.py +14 -0
- keras_hub/api/models/__init__.py +97 -48
- keras_hub/api/tokenizers/__init__.py +30 -0
- keras_hub/api/utils/__init__.py +22 -0
- keras_hub/src/api_export.py +15 -9
- keras_hub/src/bounding_box/__init__.py +13 -0
- keras_hub/src/bounding_box/converters.py +529 -0
- keras_hub/src/bounding_box/formats.py +162 -0
- keras_hub/src/bounding_box/iou.py +263 -0
- keras_hub/src/bounding_box/to_dense.py +95 -0
- keras_hub/src/bounding_box/to_ragged.py +99 -0
- keras_hub/src/bounding_box/utils.py +194 -0
- keras_hub/src/bounding_box/validate_format.py +99 -0
- keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
- keras_hub/src/layers/preprocessing/image_converter.py +130 -0
- keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
- keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
- keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
- keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
- keras_hub/src/layers/preprocessing/random_swap.py +33 -31
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
- keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
- keras_hub/src/models/albert/__init__.py +1 -2
- keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
- keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
- keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/albert/albert_tokenizer.py +17 -36
- keras_hub/src/models/backbone.py +12 -34
- keras_hub/src/models/bart/__init__.py +1 -2
- keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
- keras_hub/src/models/bart/bart_tokenizer.py +12 -39
- keras_hub/src/models/bert/__init__.py +1 -5
- keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
- keras_hub/src/models/bert/bert_presets.py +1 -4
- keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
- keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/bert/bert_tokenizer.py +17 -35
- keras_hub/src/models/bloom/__init__.py +1 -2
- keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
- keras_hub/src/models/causal_lm.py +10 -29
- keras_hub/src/models/causal_lm_preprocessor.py +195 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
- keras_hub/src/models/deberta_v3/__init__.py +1 -4
- keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
- keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
- keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
- keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
- keras_hub/src/models/densenet/densenet_backbone.py +46 -22
- keras_hub/src/models/distil_bert/__init__.py +1 -4
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
- keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
- keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
- keras_hub/src/models/efficientnet/__init__.py +13 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
- keras_hub/src/models/efficientnet/mbconv.py +238 -0
- keras_hub/src/models/electra/__init__.py +1 -2
- keras_hub/src/models/electra/electra_tokenizer.py +17 -32
- keras_hub/src/models/f_net/__init__.py +1 -2
- keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
- keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
- keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
- keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
- keras_hub/src/models/falcon/__init__.py +1 -2
- keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
- keras_hub/src/models/gemma/__init__.py +1 -2
- keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
- keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
- keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
- keras_hub/src/models/gpt2/__init__.py +1 -2
- keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
- keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
- keras_hub/src/models/image_classifier.py +0 -5
- keras_hub/src/models/image_classifier_preprocessor.py +83 -0
- keras_hub/src/models/llama/__init__.py +1 -2
- keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
- keras_hub/src/models/llama/llama_tokenizer.py +12 -25
- keras_hub/src/models/llama3/__init__.py +1 -2
- keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
- keras_hub/src/models/masked_lm.py +0 -2
- keras_hub/src/models/masked_lm_preprocessor.py +156 -0
- keras_hub/src/models/mistral/__init__.py +1 -2
- keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
- keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
- keras_hub/src/models/mobilenet/__init__.py +13 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
- keras_hub/src/models/opt/__init__.py +1 -2
- keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
- keras_hub/src/models/opt/opt_tokenizer.py +12 -41
- keras_hub/src/models/pali_gemma/__init__.py +1 -4
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
- keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
- keras_hub/src/models/phi3/__init__.py +1 -2
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
- keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
- keras_hub/src/models/preprocessor.py +72 -83
- keras_hub/src/models/resnet/__init__.py +6 -0
- keras_hub/src/models/resnet/resnet_backbone.py +390 -42
- keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
- keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
- keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
- keras_hub/src/models/resnet/resnet_presets.py +95 -0
- keras_hub/src/models/retinanet/__init__.py +13 -0
- keras_hub/src/models/retinanet/anchor_generator.py +175 -0
- keras_hub/src/models/retinanet/box_matcher.py +259 -0
- keras_hub/src/models/retinanet/non_max_supression.py +578 -0
- keras_hub/src/models/roberta/__init__.py +1 -2
- keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
- keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
- keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
- keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
- keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
- keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
- keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
- keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
- keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
- keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
- keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
- keras_hub/src/models/t5/__init__.py +1 -2
- keras_hub/src/models/t5/t5_tokenizer.py +13 -23
- keras_hub/src/models/task.py +71 -116
- keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
- keras_hub/src/models/text_classifier_preprocessor.py +138 -0
- keras_hub/src/models/whisper/__init__.py +1 -2
- keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
- keras_hub/src/models/whisper/whisper_backbone.py +0 -3
- keras_hub/src/models/whisper/whisper_presets.py +10 -10
- keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
- keras_hub/src/models/xlm_roberta/__init__.py +1 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
- keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
- keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
- keras_hub/src/tests/test_case.py +46 -0
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
- keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
- keras_hub/src/tokenizers/tokenizer.py +67 -32
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
- keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
- keras_hub/src/utils/imagenet/__init__.py +13 -0
- keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
- keras_hub/src/utils/keras_utils.py +0 -50
- keras_hub/src/utils/preset_utils.py +230 -68
- keras_hub/src/utils/tensor_utils.py +187 -69
- keras_hub/src/utils/timm/convert_resnet.py +19 -16
- keras_hub/src/utils/timm/preset_loader.py +66 -0
- keras_hub/src/utils/transformers/convert_albert.py +193 -0
- keras_hub/src/utils/transformers/convert_bart.py +373 -0
- keras_hub/src/utils/transformers/convert_bert.py +7 -17
- keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
- keras_hub/src/utils/transformers/convert_gemma.py +5 -19
- keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
- keras_hub/src/utils/transformers/convert_llama3.py +7 -18
- keras_hub/src/utils/transformers/convert_mistral.py +129 -0
- keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
- keras_hub/src/utils/transformers/preset_loader.py +77 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
- keras_hub/src/version_utils.py +1 -1
- keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
- keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
- keras_hub/src/models/bart/bart_preprocessor.py +0 -276
- keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
- keras_hub/src/models/electra/electra_preprocessor.py +0 -154
- keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
- keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
- keras_hub/src/models/llama/llama_preprocessor.py +0 -189
- keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
- keras_hub/src/models/opt/opt_preprocessor.py +0 -188
- keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
- keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
- keras_hub/src/utils/timm/convert.py +0 -37
- keras_hub/src/utils/transformers/convert.py +0 -101
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
import numpy as np
|
15
|
+
|
16
|
+
from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
|
17
|
+
from keras_hub.src.utils.preset_utils import get_file
|
18
|
+
|
19
|
+
backbone_cls = MistralBackbone
|
20
|
+
|
21
|
+
|
22
|
+
def convert_backbone_config(transformers_config):
|
23
|
+
return {
|
24
|
+
"vocabulary_size": transformers_config["vocab_size"],
|
25
|
+
"num_layers": transformers_config["num_hidden_layers"],
|
26
|
+
"num_query_heads": transformers_config["num_attention_heads"],
|
27
|
+
"hidden_dim": transformers_config["hidden_size"],
|
28
|
+
"intermediate_dim": transformers_config["intermediate_size"],
|
29
|
+
"num_key_value_heads": transformers_config["num_key_value_heads"],
|
30
|
+
"rope_max_wavelength": transformers_config["rope_theta"],
|
31
|
+
"layer_norm_epsilon": transformers_config["rms_norm_eps"],
|
32
|
+
"sliding_window": transformers_config["sliding_window"],
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
def convert_weights(backbone, loader, transformers_config):
|
37
|
+
# Embeddings
|
38
|
+
loader.port_weight(
|
39
|
+
keras_variable=backbone.token_embedding.embeddings,
|
40
|
+
hf_weight_key="model.embed_tokens.weight",
|
41
|
+
hook_fn=lambda hf_tensor, _: hf_tensor.astype(np.float16),
|
42
|
+
)
|
43
|
+
loader.port_weight(
|
44
|
+
keras_variable=backbone.token_embedding.reverse_embeddings,
|
45
|
+
hf_weight_key="lm_head.weight",
|
46
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
47
|
+
hf_tensor.astype(np.float16), axes=(1, 0)
|
48
|
+
),
|
49
|
+
)
|
50
|
+
|
51
|
+
# Attention blocks
|
52
|
+
for index in range(backbone.num_layers):
|
53
|
+
decoder_layer = backbone.transformer_layers[index]
|
54
|
+
|
55
|
+
# Norm layers
|
56
|
+
loader.port_weight(
|
57
|
+
keras_variable=decoder_layer._self_attention_layernorm.scale,
|
58
|
+
hf_weight_key=f"model.layers.{index}.input_layernorm.weight",
|
59
|
+
hook_fn=lambda hf_tensor, _: hf_tensor.astype(np.float16),
|
60
|
+
)
|
61
|
+
loader.port_weight(
|
62
|
+
keras_variable=decoder_layer._feedforward_layernorm.scale,
|
63
|
+
hf_weight_key=f"model.layers.{index}.post_attention_layernorm.weight",
|
64
|
+
hook_fn=lambda hf_tensor, _: hf_tensor.astype(np.float16),
|
65
|
+
)
|
66
|
+
|
67
|
+
# Attention layers
|
68
|
+
loader.port_weight(
|
69
|
+
keras_variable=decoder_layer._self_attention_layer._query_dense.kernel,
|
70
|
+
hf_weight_key=f"model.layers.{index}.self_attn.q_proj.weight",
|
71
|
+
hook_fn=lambda hf_tensor, keras_shape: np.reshape(
|
72
|
+
np.transpose(hf_tensor.astype(np.float16)), keras_shape
|
73
|
+
),
|
74
|
+
)
|
75
|
+
loader.port_weight(
|
76
|
+
keras_variable=decoder_layer._self_attention_layer._key_dense.kernel,
|
77
|
+
hf_weight_key=f"model.layers.{index}.self_attn.k_proj.weight",
|
78
|
+
hook_fn=lambda hf_tensor, keras_shape: np.reshape(
|
79
|
+
np.transpose(hf_tensor.astype(np.float16)), keras_shape
|
80
|
+
),
|
81
|
+
)
|
82
|
+
loader.port_weight(
|
83
|
+
keras_variable=decoder_layer._self_attention_layer._value_dense.kernel,
|
84
|
+
hf_weight_key=f"model.layers.{index}.self_attn.v_proj.weight",
|
85
|
+
hook_fn=lambda hf_tensor, keras_shape: np.reshape(
|
86
|
+
np.transpose(hf_tensor.astype(np.float16)), keras_shape
|
87
|
+
),
|
88
|
+
)
|
89
|
+
loader.port_weight(
|
90
|
+
keras_variable=decoder_layer._self_attention_layer._output_dense.kernel,
|
91
|
+
hf_weight_key=f"model.layers.{index}.self_attn.o_proj.weight",
|
92
|
+
hook_fn=lambda hf_tensor, keras_shape: np.reshape(
|
93
|
+
np.transpose(hf_tensor.astype(np.float16)), keras_shape
|
94
|
+
),
|
95
|
+
)
|
96
|
+
|
97
|
+
# MLP layers
|
98
|
+
loader.port_weight(
|
99
|
+
keras_variable=decoder_layer._feedforward_gate_dense.kernel,
|
100
|
+
hf_weight_key=f"model.layers.{index}.mlp.gate_proj.weight",
|
101
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
102
|
+
hf_tensor.astype(np.float16), axes=(1, 0)
|
103
|
+
),
|
104
|
+
)
|
105
|
+
loader.port_weight(
|
106
|
+
keras_variable=decoder_layer._feedforward_intermediate_dense.kernel,
|
107
|
+
hf_weight_key=f"model.layers.{index}.mlp.up_proj.weight",
|
108
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
109
|
+
hf_tensor.astype(np.float16), axes=(1, 0)
|
110
|
+
),
|
111
|
+
)
|
112
|
+
loader.port_weight(
|
113
|
+
keras_variable=decoder_layer._feedforward_output_dense.kernel,
|
114
|
+
hf_weight_key=f"model.layers.{index}.mlp.down_proj.weight",
|
115
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
116
|
+
hf_tensor.astype(np.float16), axes=(1, 0)
|
117
|
+
),
|
118
|
+
)
|
119
|
+
|
120
|
+
# Normalization
|
121
|
+
loader.port_weight(
|
122
|
+
keras_variable=backbone.layer_norm.scale,
|
123
|
+
hf_weight_key="model.norm.weight",
|
124
|
+
hook_fn=lambda hf_tensor, _: hf_tensor.astype(np.float16),
|
125
|
+
)
|
126
|
+
|
127
|
+
|
128
|
+
def convert_tokenizer(cls, preset, **kwargs):
|
129
|
+
return cls(get_file(preset, "tokenizer.model"), **kwargs)
|
@@ -13,11 +13,12 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
import numpy as np
|
15
15
|
|
16
|
-
from keras_hub.src.
|
16
|
+
from keras_hub.src.models.pali_gemma.pali_gemma_backbone import (
|
17
|
+
PaliGemmaBackbone,
|
18
|
+
)
|
17
19
|
from keras_hub.src.utils.preset_utils import get_file
|
18
|
-
|
19
|
-
|
20
|
-
from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
|
20
|
+
|
21
|
+
backbone_cls = PaliGemmaBackbone
|
21
22
|
|
22
23
|
|
23
24
|
def convert_backbone_config(transformers_config):
|
@@ -275,29 +276,6 @@ def convert_weights(backbone, loader, transformers_config):
|
|
275
276
|
hook_fn=lambda hf_tensor, keras_shape: hf_tensor[: keras_shape[0]],
|
276
277
|
)
|
277
278
|
|
278
|
-
return backbone
|
279
|
-
|
280
|
-
|
281
|
-
def load_pali_gemma_backbone(cls, preset, load_weights):
|
282
|
-
transformers_config = load_config(preset, HF_CONFIG_FILE)
|
283
|
-
keras_config = convert_backbone_config(transformers_config)
|
284
|
-
backbone = cls(**keras_config)
|
285
|
-
if load_weights:
|
286
|
-
jax_memory_cleanup(backbone)
|
287
|
-
with SafetensorLoader(preset) as loader:
|
288
|
-
convert_weights(backbone, loader, transformers_config)
|
289
|
-
return backbone
|
290
|
-
|
291
|
-
|
292
|
-
def load_pali_gemma_tokenizer(cls, preset):
|
293
|
-
"""
|
294
|
-
Load the Gemma tokenizer.
|
295
|
-
|
296
|
-
Args:
|
297
|
-
cls (class): Tokenizer class.
|
298
|
-
preset (str): Preset configuration name.
|
299
279
|
|
300
|
-
|
301
|
-
|
302
|
-
"""
|
303
|
-
return cls(get_file(preset, "tokenizer.model"))
|
280
|
+
def convert_tokenizer(cls, preset, **kwargs):
|
281
|
+
return cls(get_file(preset, "tokenizer.model"), **kwargs)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Convert huggingface models to KerasHub."""
|
15
|
+
|
16
|
+
|
17
|
+
from keras_hub.src.utils.preset_utils import PresetLoader
|
18
|
+
from keras_hub.src.utils.preset_utils import jax_memory_cleanup
|
19
|
+
from keras_hub.src.utils.transformers import convert_albert
|
20
|
+
from keras_hub.src.utils.transformers import convert_bart
|
21
|
+
from keras_hub.src.utils.transformers import convert_bert
|
22
|
+
from keras_hub.src.utils.transformers import convert_distilbert
|
23
|
+
from keras_hub.src.utils.transformers import convert_gemma
|
24
|
+
from keras_hub.src.utils.transformers import convert_gpt2
|
25
|
+
from keras_hub.src.utils.transformers import convert_llama3
|
26
|
+
from keras_hub.src.utils.transformers import convert_mistral
|
27
|
+
from keras_hub.src.utils.transformers import convert_pali_gemma
|
28
|
+
from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
|
29
|
+
|
30
|
+
|
31
|
+
class TransformersPresetLoader(PresetLoader):
|
32
|
+
def __init__(self, preset, config):
|
33
|
+
super().__init__(preset, config)
|
34
|
+
model_type = self.config["model_type"]
|
35
|
+
if model_type == "albert":
|
36
|
+
self.converter = convert_albert
|
37
|
+
elif model_type == "bart":
|
38
|
+
self.converter = convert_bart
|
39
|
+
elif model_type == "bert":
|
40
|
+
self.converter = convert_bert
|
41
|
+
elif model_type == "distilbert":
|
42
|
+
self.converter = convert_distilbert
|
43
|
+
elif model_type == "gemma" or model_type == "gemma2":
|
44
|
+
self.converter = convert_gemma
|
45
|
+
elif model_type == "gpt2":
|
46
|
+
self.converter = convert_gpt2
|
47
|
+
elif model_type == "llama":
|
48
|
+
# TODO: handle other llama versions.
|
49
|
+
self.converter = convert_llama3
|
50
|
+
elif model_type == "mistral":
|
51
|
+
self.converter = convert_mistral
|
52
|
+
elif model_type == "paligemma":
|
53
|
+
self.converter = convert_pali_gemma
|
54
|
+
else:
|
55
|
+
raise ValueError(
|
56
|
+
"KerasHub has no converter for huggingface/transformers models "
|
57
|
+
f"with model type `'{model_type}'`."
|
58
|
+
)
|
59
|
+
|
60
|
+
def check_backbone_class(self):
|
61
|
+
return self.converter.backbone_cls
|
62
|
+
|
63
|
+
def load_backbone(self, cls, load_weights, **kwargs):
|
64
|
+
keras_config = self.converter.convert_backbone_config(self.config)
|
65
|
+
backbone = cls(**{**keras_config, **kwargs})
|
66
|
+
if load_weights:
|
67
|
+
jax_memory_cleanup(backbone)
|
68
|
+
with SafetensorLoader(self.preset) as loader:
|
69
|
+
self.converter.convert_weights(backbone, loader, self.config)
|
70
|
+
return backbone
|
71
|
+
|
72
|
+
def load_tokenizer(self, cls, **kwargs):
|
73
|
+
return self.converter.convert_tokenizer(cls, self.preset, **kwargs)
|
74
|
+
|
75
|
+
def load_image_converter(self, cls, **kwargs):
|
76
|
+
# TODO: set image size for pali gemma checkpoints.
|
77
|
+
return None
|
@@ -17,7 +17,7 @@ from keras_hub.src.utils.preset_utils import SAFETENSOR_CONFIG_FILE
|
|
17
17
|
from keras_hub.src.utils.preset_utils import SAFETENSOR_FILE
|
18
18
|
from keras_hub.src.utils.preset_utils import check_file_exists
|
19
19
|
from keras_hub.src.utils.preset_utils import get_file
|
20
|
-
from keras_hub.src.utils.preset_utils import
|
20
|
+
from keras_hub.src.utils.preset_utils import load_json
|
21
21
|
|
22
22
|
try:
|
23
23
|
import safetensors
|
@@ -38,7 +38,7 @@ class SafetensorLoader(contextlib.ExitStack):
|
|
38
38
|
|
39
39
|
self.preset = preset
|
40
40
|
if check_file_exists(preset, SAFETENSOR_CONFIG_FILE):
|
41
|
-
self.safetensor_config =
|
41
|
+
self.safetensor_config = load_json(preset, SAFETENSOR_CONFIG_FILE)
|
42
42
|
else:
|
43
43
|
self.safetensor_config = None
|
44
44
|
self.safetensor_files = {}
|
keras_hub/src/version_utils.py
CHANGED
@@ -0,0 +1,202 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: keras_hub-nightly
|
3
|
+
Version: 0.16.0.dev2024092017
|
4
|
+
Summary: Industry-strength Natural Language Processing extensions for Keras.
|
5
|
+
Home-page: https://github.com/keras-team/keras-hub
|
6
|
+
Author: Keras team
|
7
|
+
Author-email: keras-hub@google.com
|
8
|
+
License: Apache License 2.0
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
15
|
+
Classifier: Operating System :: Unix
|
16
|
+
Classifier: Operating System :: Microsoft :: Windows
|
17
|
+
Classifier: Operating System :: MacOS
|
18
|
+
Classifier: Intended Audience :: Science/Research
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
20
|
+
Classifier: Topic :: Software Development
|
21
|
+
Requires-Python: >=3.9
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
Requires-Dist: absl-py
|
24
|
+
Requires-Dist: numpy
|
25
|
+
Requires-Dist: packaging
|
26
|
+
Requires-Dist: regex
|
27
|
+
Requires-Dist: rich
|
28
|
+
Requires-Dist: kagglehub
|
29
|
+
Requires-Dist: tensorflow-text ; platform_system != "Darwin"
|
30
|
+
Provides-Extra: extras
|
31
|
+
Requires-Dist: rouge-score ; extra == 'extras'
|
32
|
+
Requires-Dist: sentencepiece ; extra == 'extras'
|
33
|
+
|
34
|
+
# KerasNLP: Multi-framework NLP Models
|
35
|
+
[](https://github.com/keras-team/keras-hub/actions?query=workflow%3ATests+branch%3Amaster)
|
36
|
+

|
37
|
+
[](https://github.com/keras-team/keras-hub/issues)
|
38
|
+
|
39
|
+
> [!IMPORTANT]
|
40
|
+
> 📢 KerasNLP is becoming KerasHub! 📢 Read
|
41
|
+
> [the announcement](https://github.com/keras-team/keras-hub/issues/1831).
|
42
|
+
>
|
43
|
+
> We have renamed the repo to KerasHub in preparation for the release, but have not yet
|
44
|
+
> released the new package. Follow the announcement for news.
|
45
|
+
|
46
|
+
KerasNLP is a natural language processing library that works natively
|
47
|
+
with TensorFlow, JAX, or PyTorch. KerasNLP provides a repository of pre-trained
|
48
|
+
models and a collection of lower-level building blocks for language modeling.
|
49
|
+
Built on Keras 3, models can be trained and serialized in any framework
|
50
|
+
and re-used in another without costly migrations.
|
51
|
+
|
52
|
+
This library is an extension of the core Keras API; all high-level modules are
|
53
|
+
Layers and Models that receive that same level of polish as core Keras.
|
54
|
+
If you are familiar with Keras, congratulations! You already understand most of
|
55
|
+
KerasNLP.
|
56
|
+
|
57
|
+
All models support JAX, TensorFlow, and PyTorch from a single model
|
58
|
+
definition and can be fine-tuned on GPUs and TPUs out of the box. Models can
|
59
|
+
be trained on individual accelerators with built-in PEFT techniques, or
|
60
|
+
fine-tuned at scale with model and data parallel training. See our
|
61
|
+
[Getting Started guide](https://keras.io/guides/keras_nlp/getting_started)
|
62
|
+
to start learning our API. Browse our models on
|
63
|
+
[Kaggle](https://www.kaggle.com/organizations/keras/models).
|
64
|
+
We welcome contributions.
|
65
|
+
|
66
|
+
## Quick Links
|
67
|
+
|
68
|
+
### For everyone
|
69
|
+
|
70
|
+
- [Home Page](https://keras.io/keras_nlp)
|
71
|
+
- [Developer Guides](https://keras.io/guides/keras_nlp)
|
72
|
+
- [API Reference](https://keras.io/api/keras_nlp)
|
73
|
+
- [Pre-trained Models](https://www.kaggle.com/organizations/keras/models)
|
74
|
+
|
75
|
+
### For contributors
|
76
|
+
|
77
|
+
- [Contributing Guide](CONTRIBUTING.md)
|
78
|
+
- [Roadmap](ROADMAP.md)
|
79
|
+
- [Style Guide](STYLE_GUIDE.md)
|
80
|
+
- [API Design Guide](API_DESIGN_GUIDE.md)
|
81
|
+
- [Call for Contributions](https://github.com/keras-team/keras-hub/issues?q=is%3Aissue+is%3Aopen+label%3A%22contributions+welcome%22)
|
82
|
+
|
83
|
+
## Quickstart
|
84
|
+
|
85
|
+
Fine-tune BERT on IMDb movie reviews:
|
86
|
+
|
87
|
+
```python
|
88
|
+
import os
|
89
|
+
os.environ["KERAS_BACKEND"] = "jax" # Or "tensorflow" or "torch"!
|
90
|
+
|
91
|
+
import keras_nlp
|
92
|
+
import tensorflow_datasets as tfds
|
93
|
+
|
94
|
+
imdb_train, imdb_test = tfds.load(
|
95
|
+
"imdb_reviews",
|
96
|
+
split=["train", "test"],
|
97
|
+
as_supervised=True,
|
98
|
+
batch_size=16,
|
99
|
+
)
|
100
|
+
# Load a BERT model.
|
101
|
+
classifier = keras_nlp.models.Classifier.from_preset(
|
102
|
+
"bert_base_en",
|
103
|
+
num_classes=2,
|
104
|
+
activation="softmax",
|
105
|
+
)
|
106
|
+
# Fine-tune on IMDb movie reviews.
|
107
|
+
classifier.fit(imdb_train, validation_data=imdb_test)
|
108
|
+
# Predict two new examples.
|
109
|
+
classifier.predict(["What an amazing movie!", "A total waste of my time."])
|
110
|
+
```
|
111
|
+
|
112
|
+
Try it out [in a colab](https://colab.research.google.com/gist/mattdangerw/e457e42d5ea827110c8d5cb4eb9d9a07/kerasnlp-quickstart.ipynb).
|
113
|
+
For more in depth guides and examples, visit
|
114
|
+
[keras.io/keras_nlp](https://keras.io/keras_nlp/).
|
115
|
+
|
116
|
+
## Installation
|
117
|
+
|
118
|
+
To install the latest KerasNLP release with Keras 3, simply run:
|
119
|
+
|
120
|
+
```
|
121
|
+
pip install --upgrade keras-nlp
|
122
|
+
```
|
123
|
+
|
124
|
+
To install the latest nightly changes for both KerasNLP and Keras, you can use
|
125
|
+
our nightly package.
|
126
|
+
|
127
|
+
```
|
128
|
+
pip install --upgrade keras-nlp-nightly
|
129
|
+
```
|
130
|
+
|
131
|
+
Note that currently, installing KerasNLP will always pull in TensorFlow for use
|
132
|
+
of the `tf.data` API for preprocessing. Even when pre-processing with `tf.data`,
|
133
|
+
training can still happen on any backend.
|
134
|
+
|
135
|
+
Read [Getting started with Keras](https://keras.io/getting_started/) for more
|
136
|
+
information on installing Keras 3 and compatibility with different frameworks.
|
137
|
+
|
138
|
+
> [!IMPORTANT]
|
139
|
+
> We recommend using KerasNLP with TensorFlow 2.16 or later, as TF 2.16 packages
|
140
|
+
> Keras 3 by default.
|
141
|
+
|
142
|
+
## Configuring your backend
|
143
|
+
|
144
|
+
If you have Keras 3 installed in your environment (see installation above),
|
145
|
+
you can use KerasNLP with any of JAX, TensorFlow and PyTorch. To do so, set the
|
146
|
+
`KERAS_BACKEND` environment variable. For example:
|
147
|
+
|
148
|
+
```shell
|
149
|
+
export KERAS_BACKEND=jax
|
150
|
+
```
|
151
|
+
|
152
|
+
Or in Colab, with:
|
153
|
+
|
154
|
+
```python
|
155
|
+
import os
|
156
|
+
os.environ["KERAS_BACKEND"] = "jax"
|
157
|
+
|
158
|
+
import keras_nlp
|
159
|
+
```
|
160
|
+
|
161
|
+
> [!IMPORTANT]
|
162
|
+
> Make sure to set the `KERAS_BACKEND` before import any Keras libraries, it
|
163
|
+
> will be used to set up Keras when it is first imported.
|
164
|
+
|
165
|
+
## Compatibility
|
166
|
+
|
167
|
+
We follow [Semantic Versioning](https://semver.org/), and plan to
|
168
|
+
provide backwards compatibility guarantees both for code and saved models built
|
169
|
+
with our components. While we continue with pre-release `0.y.z` development, we
|
170
|
+
may break compatibility at any time and APIs should not be consider stable.
|
171
|
+
|
172
|
+
## Disclaimer
|
173
|
+
|
174
|
+
KerasNLP provides access to pre-trained models via the `keras_nlp.models` API.
|
175
|
+
These pre-trained models are provided on an "as is" basis, without warranties
|
176
|
+
or conditions of any kind. The following underlying models are provided by third
|
177
|
+
parties, and subject to separate licenses:
|
178
|
+
BART, BLOOM, DeBERTa, DistilBERT, GPT-2, Llama, Mistral, OPT, RoBERTa, Whisper,
|
179
|
+
and XLM-RoBERTa.
|
180
|
+
|
181
|
+
## Citing KerasNLP
|
182
|
+
|
183
|
+
If KerasNLP helps your research, we appreciate your citations.
|
184
|
+
Here is the BibTeX entry:
|
185
|
+
|
186
|
+
```bibtex
|
187
|
+
@misc{kerasnlp2022,
|
188
|
+
title={KerasNLP},
|
189
|
+
author={Watson, Matthew, and Qian, Chen, and Bischof, Jonathan and Chollet,
|
190
|
+
Fran\c{c}ois and others},
|
191
|
+
year={2022},
|
192
|
+
howpublished={\url{https://github.com/keras-team/keras-hub}},
|
193
|
+
}
|
194
|
+
```
|
195
|
+
|
196
|
+
## Acknowledgements
|
197
|
+
|
198
|
+
Thank you to all of our wonderful contributors!
|
199
|
+
|
200
|
+
<a href="https://github.com/keras-team/keras-hub/graphs/contributors">
|
201
|
+
<img src="https://contrib.rocks/image?repo=keras-team/keras-hub" />
|
202
|
+
</a>
|