keras-hub 0.25.1__py3-none-any.whl → 0.26.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/layers/__init__.py +21 -0
- keras_hub/models/__init__.py +27 -0
- keras_hub/src/layers/modeling/non_max_supression.py +5 -2
- keras_hub/src/layers/modeling/reversible_embedding.py +2 -275
- keras_hub/src/layers/modeling/token_and_position_embedding.py +6 -6
- keras_hub/src/layers/modeling/transformer_layer_utils.py +9 -9
- keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +3 -1
- keras_hub/src/layers/preprocessing/multi_segment_packer.py +3 -1
- keras_hub/src/models/albert/albert_backbone.py +1 -3
- keras_hub/src/models/backbone.py +3 -0
- keras_hub/src/models/bart/bart_backbone.py +1 -3
- keras_hub/src/models/bert/bert_backbone.py +2 -4
- keras_hub/src/models/bloom/bloom_backbone.py +1 -3
- keras_hub/src/models/causal_lm.py +2 -2
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -3
- keras_hub/src/models/edrec/edrec_backbone.py +147 -0
- keras_hub/src/models/edrec/edrec_layers.py +434 -0
- keras_hub/src/models/edrec/edrec_seq2seq_lm.py +273 -0
- keras_hub/src/models/electra/electra_backbone.py +1 -3
- keras_hub/src/models/f_net/f_net_backbone.py +1 -3
- keras_hub/src/models/falcon/falcon_backbone.py +1 -3
- keras_hub/src/models/flux/flux_layers.py +3 -3
- keras_hub/src/models/flux/flux_maths.py +29 -15
- keras_hub/src/models/gemma/gemma_backbone.py +1 -3
- keras_hub/src/models/gemma/gemma_causal_lm.py +1 -1
- keras_hub/src/models/gemma3/gemma3_attention.py +1 -1
- keras_hub/src/models/gemma3/gemma3_backbone.py +70 -8
- keras_hub/src/models/gemma3/gemma3_causal_lm.py +16 -1
- keras_hub/src/models/gemma3/gemma3_decoder_block.py +1 -1
- keras_hub/src/models/gemma3/{gemma3_interleave_embeddings.py → gemma3_layers.py} +101 -0
- keras_hub/src/models/gemma3/gemma3_presets.py +67 -7
- keras_hub/src/models/gemma3/gemma3_vision_encoder.py +1 -1
- keras_hub/src/models/gpt2/gpt2_backbone.py +1 -3
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +1 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_backbone.py +1 -3
- keras_hub/src/models/gpt_oss/gpt_oss_backbone.py +1 -3
- keras_hub/src/models/llama/llama_backbone.py +1 -3
- keras_hub/src/models/masked_lm.py +1 -1
- keras_hub/src/models/mistral/mistral_backbone.py +1 -3
- keras_hub/src/models/mixtral/mixtral_backbone.py +1 -3
- keras_hub/src/models/moonshine/moonshine_backbone.py +1 -3
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +1 -3
- keras_hub/src/models/parseq/parseq_tokenizer.py +3 -1
- keras_hub/src/models/phi3/phi3_backbone.py +1 -3
- keras_hub/src/models/qwen/qwen_backbone.py +1 -3
- keras_hub/src/models/qwen/qwen_presets.py +209 -0
- keras_hub/src/models/qwen3/qwen3_backbone.py +1 -3
- keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +1 -3
- keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +15 -0
- keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +1 -3
- keras_hub/src/models/roformer_v2/roformer_v2_backbone.py +1 -3
- keras_hub/src/models/rqvae/__init__.py +5 -0
- keras_hub/src/models/rqvae/rqvae_backbone.py +167 -0
- keras_hub/src/models/rqvae/rqvae_layers.py +335 -0
- keras_hub/src/models/rwkv7/__init__.py +5 -0
- keras_hub/src/models/rwkv7/rwkv7_backbone.py +180 -0
- keras_hub/src/models/rwkv7/rwkv7_causal_lm.py +259 -0
- keras_hub/src/models/rwkv7/rwkv7_causal_lm_preprocessor.py +214 -0
- keras_hub/src/models/rwkv7/rwkv7_layer.py +724 -0
- keras_hub/src/models/rwkv7/rwkv7_presets.py +26 -0
- keras_hub/src/models/rwkv7/rwkv7_tokenizer.py +495 -0
- keras_hub/src/models/sam/sam_backbone.py +5 -1
- keras_hub/src/models/sam/sam_prompt_encoder.py +1 -1
- keras_hub/src/models/sam3/__init__.py +7 -0
- keras_hub/src/models/sam3/roi_align.py +222 -0
- keras_hub/src/models/sam3/sam3_detr_decoder.py +641 -0
- keras_hub/src/models/sam3/sam3_detr_encoder.py +293 -0
- keras_hub/src/models/sam3/sam3_dot_product_scoring.py +120 -0
- keras_hub/src/models/sam3/sam3_geometry_encoder.py +517 -0
- keras_hub/src/models/sam3/sam3_image_converter.py +10 -0
- keras_hub/src/models/sam3/sam3_layers.py +814 -0
- keras_hub/src/models/sam3/sam3_mask_decoder.py +374 -0
- keras_hub/src/models/sam3/sam3_pc_backbone.py +306 -0
- keras_hub/src/models/sam3/sam3_pc_image_segmenter.py +282 -0
- keras_hub/src/models/sam3/sam3_pc_image_segmenter_preprocessor.py +336 -0
- keras_hub/src/models/sam3/sam3_presets.py +16 -0
- keras_hub/src/models/sam3/sam3_text_encoder.py +212 -0
- keras_hub/src/models/sam3/sam3_tokenizer.py +65 -0
- keras_hub/src/models/sam3/sam3_utils.py +134 -0
- keras_hub/src/models/sam3/sam3_vision_encoder.py +738 -0
- keras_hub/src/models/segformer/segformer_backbone.py +6 -6
- keras_hub/src/models/siglip/siglip_layers.py +1 -3
- keras_hub/src/models/smollm3/smollm3_backbone.py +1 -3
- keras_hub/src/models/stable_diffusion_3/t5_encoder.py +1 -3
- keras_hub/src/models/t5/t5_backbone.py +1 -3
- keras_hub/src/models/t5gemma/t5gemma_backbone.py +1 -3
- keras_hub/src/models/task.py +1 -1
- keras_hub/src/tests/test_case.py +394 -3
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +33 -2
- keras_hub/src/tokenizers/byte_tokenizer.py +3 -1
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +15 -1
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +3 -1
- keras_hub/src/tokenizers/word_piece_tokenizer.py +15 -1
- keras_hub/src/utils/preset_utils.py +1 -1
- keras_hub/src/utils/tensor_utils.py +12 -0
- keras_hub/src/utils/transformers/convert_gemma3.py +68 -22
- keras_hub/src/utils/transformers/convert_qwen3_moe.py +4 -1
- keras_hub/src/utils/transformers/convert_sam3.py +472 -0
- keras_hub/src/utils/transformers/export/gemma3.py +196 -0
- keras_hub/src/utils/transformers/export/hf_exporter.py +86 -25
- keras_hub/src/utils/transformers/export/qwen.py +136 -0
- keras_hub/src/utils/transformers/preset_loader.py +15 -1
- keras_hub/src/version.py +1 -1
- keras_hub/tokenizers/__init__.py +6 -0
- {keras_hub-0.25.1.dist-info → keras_hub-0.26.0.dev0.dist-info}/METADATA +6 -13
- {keras_hub-0.25.1.dist-info → keras_hub-0.26.0.dev0.dist-info}/RECORD +108 -76
- {keras_hub-0.25.1.dist-info → keras_hub-0.26.0.dev0.dist-info}/WHEEL +1 -1
- keras_hub/src/models/gemma3/rms_normalization.py +0 -26
- {keras_hub-0.25.1.dist-info → keras_hub-0.26.0.dev0.dist-info}/top_level.txt +0 -0
|
@@ -5,26 +5,45 @@ import warnings
|
|
|
5
5
|
|
|
6
6
|
import keras
|
|
7
7
|
|
|
8
|
+
# --- Gemma Utils ---
|
|
8
9
|
from keras_hub.src.utils.transformers.export.gemma import get_gemma_config
|
|
9
10
|
from keras_hub.src.utils.transformers.export.gemma import (
|
|
10
11
|
get_gemma_tokenizer_config,
|
|
11
12
|
)
|
|
12
13
|
from keras_hub.src.utils.transformers.export.gemma import get_gemma_weights_map
|
|
13
14
|
|
|
15
|
+
# --- Gemma 3 Utils ---
|
|
16
|
+
from keras_hub.src.utils.transformers.export.gemma3 import get_gemma3_config
|
|
17
|
+
from keras_hub.src.utils.transformers.export.gemma3 import (
|
|
18
|
+
get_gemma3_tokenizer_config,
|
|
19
|
+
)
|
|
20
|
+
from keras_hub.src.utils.transformers.export.gemma3 import (
|
|
21
|
+
get_gemma3_weights_map,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# --- Qwen Utils ---
|
|
25
|
+
from keras_hub.src.utils.transformers.export.qwen import get_qwen_config
|
|
26
|
+
from keras_hub.src.utils.transformers.export.qwen import (
|
|
27
|
+
get_qwen_tokenizer_config,
|
|
28
|
+
)
|
|
29
|
+
from keras_hub.src.utils.transformers.export.qwen import get_qwen_weights_map
|
|
30
|
+
|
|
14
31
|
MODEL_CONFIGS = {
|
|
15
32
|
"GemmaBackbone": get_gemma_config,
|
|
16
|
-
|
|
33
|
+
"Gemma3Backbone": get_gemma3_config,
|
|
34
|
+
"QwenBackbone": get_qwen_config,
|
|
17
35
|
}
|
|
18
36
|
|
|
19
37
|
MODEL_EXPORTERS = {
|
|
20
38
|
"GemmaBackbone": get_gemma_weights_map,
|
|
21
|
-
|
|
39
|
+
"Gemma3Backbone": get_gemma3_weights_map,
|
|
40
|
+
"QwenBackbone": get_qwen_weights_map,
|
|
22
41
|
}
|
|
23
42
|
|
|
24
43
|
MODEL_TOKENIZER_CONFIGS = {
|
|
25
44
|
"GemmaTokenizer": get_gemma_tokenizer_config,
|
|
26
|
-
|
|
27
|
-
|
|
45
|
+
"Gemma3Tokenizer": get_gemma3_tokenizer_config,
|
|
46
|
+
"QwenTokenizer": get_qwen_tokenizer_config,
|
|
28
47
|
}
|
|
29
48
|
|
|
30
49
|
|
|
@@ -54,23 +73,55 @@ def export_backbone(backbone, path, include_lm_head=False):
|
|
|
54
73
|
weights_dict = get_weights_fn(backbone, include_lm_head=include_lm_head)
|
|
55
74
|
if not weights_dict:
|
|
56
75
|
raise ValueError("No weights to save.")
|
|
76
|
+
|
|
57
77
|
# Save config
|
|
58
78
|
os.makedirs(path, exist_ok=True)
|
|
59
79
|
config_path = os.path.join(path, "config.json")
|
|
80
|
+
|
|
81
|
+
config_to_save = hf_config
|
|
82
|
+
if hasattr(hf_config, "to_dict"):
|
|
83
|
+
config_to_save = hf_config.to_dict()
|
|
84
|
+
|
|
60
85
|
with open(config_path, "w") as f:
|
|
61
|
-
json.dump(
|
|
86
|
+
json.dump(config_to_save, f, indent=2)
|
|
87
|
+
|
|
62
88
|
# Save weights based on backend
|
|
63
89
|
weights_path = os.path.join(path, "model.safetensors")
|
|
64
90
|
if backend == "torch":
|
|
91
|
+
# Lazy import to prevent crash on TF-only environments
|
|
92
|
+
import torch
|
|
65
93
|
from safetensors.torch import save_file
|
|
66
94
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
95
|
+
weights_dict_torch = {}
|
|
96
|
+
for k, v in weights_dict.items():
|
|
97
|
+
tensor = v.value if hasattr(v, "value") else v
|
|
98
|
+
|
|
99
|
+
if isinstance(tensor, torch.Tensor):
|
|
100
|
+
t = tensor.detach().to("cpu")
|
|
101
|
+
elif hasattr(tensor, "numpy"):
|
|
102
|
+
t = torch.tensor(tensor.numpy())
|
|
103
|
+
elif hasattr(tensor, "__array__"):
|
|
104
|
+
t = torch.tensor(tensor)
|
|
105
|
+
else:
|
|
106
|
+
t = tensor
|
|
107
|
+
|
|
108
|
+
if hasattr(t, "contiguous"):
|
|
109
|
+
t = t.contiguous()
|
|
110
|
+
|
|
111
|
+
weights_dict_torch[k] = t
|
|
112
|
+
|
|
113
|
+
# Handle Tied Weights
|
|
114
|
+
if (
|
|
115
|
+
"lm_head.weight" in weights_dict_torch
|
|
116
|
+
and "model.embed_tokens.weight" in weights_dict_torch
|
|
117
|
+
):
|
|
118
|
+
wte = weights_dict_torch["model.embed_tokens.weight"]
|
|
119
|
+
lm = weights_dict_torch["lm_head.weight"]
|
|
120
|
+
if wte.data_ptr() == lm.data_ptr():
|
|
121
|
+
weights_dict_torch["lm_head.weight"] = lm.clone().contiguous()
|
|
122
|
+
|
|
123
|
+
save_file(weights_dict_torch, weights_path, metadata={"format": "pt"})
|
|
124
|
+
|
|
74
125
|
elif backend == "tensorflow":
|
|
75
126
|
from safetensors.tensorflow import save_file
|
|
76
127
|
|
|
@@ -91,31 +142,41 @@ def export_tokenizer(tokenizer, path):
|
|
|
91
142
|
path: str. Path to save the exported tokenizer.
|
|
92
143
|
"""
|
|
93
144
|
os.makedirs(path, exist_ok=True)
|
|
145
|
+
|
|
94
146
|
# Save tokenizer assets
|
|
95
147
|
tokenizer.save_assets(path)
|
|
148
|
+
|
|
96
149
|
# Export tokenizer config
|
|
97
150
|
tokenizer_type = tokenizer.__class__.__name__
|
|
98
151
|
if tokenizer_type not in MODEL_TOKENIZER_CONFIGS:
|
|
99
152
|
raise ValueError(
|
|
100
|
-
"Export to
|
|
153
|
+
f"Export to Transformer format not implemented for {tokenizer_type}"
|
|
101
154
|
)
|
|
102
155
|
get_tokenizer_config_fn = MODEL_TOKENIZER_CONFIGS[tokenizer_type]
|
|
103
156
|
tokenizer_config = get_tokenizer_config_fn(tokenizer)
|
|
104
157
|
tokenizer_config_path = os.path.join(path, "tokenizer_config.json")
|
|
105
158
|
with open(tokenizer_config_path, "w") as f:
|
|
106
159
|
json.dump(tokenizer_config, f, indent=4)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
"
|
|
118
|
-
|
|
160
|
+
|
|
161
|
+
# Rename files to match Hugging Face expectations
|
|
162
|
+
|
|
163
|
+
# 1. SentencePiece Models (Gemma / Gemma 3)
|
|
164
|
+
if tokenizer_type in ["GemmaTokenizer", "Gemma3Tokenizer"]:
|
|
165
|
+
vocab_spm_path = os.path.join(path, "vocabulary.spm")
|
|
166
|
+
tokenizer_model_path = os.path.join(path, "tokenizer.model")
|
|
167
|
+
if os.path.exists(vocab_spm_path):
|
|
168
|
+
shutil.move(vocab_spm_path, tokenizer_model_path)
|
|
169
|
+
else:
|
|
170
|
+
warnings.warn(f"{vocab_spm_path} not found.")
|
|
171
|
+
|
|
172
|
+
# 2. BPE Models (Qwen)
|
|
173
|
+
elif tokenizer_type == "QwenTokenizer":
|
|
174
|
+
vocab_json_path = os.path.join(path, "vocabulary.json")
|
|
175
|
+
vocab_hf_path = os.path.join(path, "vocab.json")
|
|
176
|
+
if os.path.exists(vocab_json_path):
|
|
177
|
+
shutil.move(vocab_json_path, vocab_hf_path)
|
|
178
|
+
else:
|
|
179
|
+
warnings.warn(f"{vocab_json_path} not found.")
|
|
119
180
|
|
|
120
181
|
|
|
121
182
|
def export_to_safetensors(keras_model, path):
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import keras.ops as ops
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_qwen_config(backbone):
|
|
5
|
+
"""Convert Keras Qwen config to Hugging Face Qwen2Config."""
|
|
6
|
+
return {
|
|
7
|
+
# Core architectural dimensions
|
|
8
|
+
"vocab_size": backbone.vocabulary_size,
|
|
9
|
+
"hidden_size": backbone.hidden_dim,
|
|
10
|
+
"num_hidden_layers": backbone.num_layers,
|
|
11
|
+
"num_attention_heads": backbone.num_query_heads,
|
|
12
|
+
"num_key_value_heads": backbone.num_key_value_heads,
|
|
13
|
+
"intermediate_size": backbone.intermediate_dim,
|
|
14
|
+
# Activation and regularization
|
|
15
|
+
"hidden_act": "silu",
|
|
16
|
+
"attention_dropout": backbone.dropout,
|
|
17
|
+
# Numerical stability and initialization
|
|
18
|
+
"rms_norm_eps": backbone.layer_norm_epsilon,
|
|
19
|
+
"initializer_range": 0.02,
|
|
20
|
+
# RoPE settings
|
|
21
|
+
"rope_theta": backbone.rope_max_wavelength,
|
|
22
|
+
# Model behavior
|
|
23
|
+
"use_cache": True,
|
|
24
|
+
"tie_word_embeddings": backbone.tie_word_embeddings,
|
|
25
|
+
"model_type": "qwen2",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_qwen_weights_map(backbone, include_lm_head=False):
|
|
30
|
+
"""Create a weights map for a given Qwen model."""
|
|
31
|
+
weights_map = {}
|
|
32
|
+
|
|
33
|
+
# 1. Embeddings
|
|
34
|
+
weights_map["model.embed_tokens.weight"] = backbone.get_layer(
|
|
35
|
+
"token_embedding"
|
|
36
|
+
).embeddings
|
|
37
|
+
|
|
38
|
+
for i in range(backbone.num_layers):
|
|
39
|
+
# Access the decoder layer
|
|
40
|
+
decoder_layer = backbone.get_layer(f"transformer_layer_{i}")
|
|
41
|
+
|
|
42
|
+
# --- Normalization ---
|
|
43
|
+
# Input Norm (Pre-Attention)
|
|
44
|
+
weights_map[f"model.layers.{i}.input_layernorm.weight"] = (
|
|
45
|
+
decoder_layer._self_attention_layernorm.scale
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Post Attention Norm (Pre-MLP)
|
|
49
|
+
weights_map[f"model.layers.{i}.post_attention_layernorm.weight"] = (
|
|
50
|
+
decoder_layer._feedforward_layernorm.scale
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# --- Attention ---
|
|
54
|
+
attn_layer = decoder_layer._self_attention_layer
|
|
55
|
+
|
|
56
|
+
# Query
|
|
57
|
+
q_kernel = attn_layer._query_dense.kernel
|
|
58
|
+
q_kernel = ops.reshape(q_kernel, (backbone.hidden_dim, -1))
|
|
59
|
+
weights_map[f"model.layers.{i}.self_attn.q_proj.weight"] = (
|
|
60
|
+
ops.transpose(q_kernel)
|
|
61
|
+
)
|
|
62
|
+
weights_map[f"model.layers.{i}.self_attn.q_proj.bias"] = ops.reshape(
|
|
63
|
+
attn_layer._query_dense.bias, (-1,)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Key
|
|
67
|
+
k_kernel = attn_layer._key_dense.kernel
|
|
68
|
+
k_kernel = ops.reshape(k_kernel, (backbone.hidden_dim, -1))
|
|
69
|
+
weights_map[f"model.layers.{i}.self_attn.k_proj.weight"] = (
|
|
70
|
+
ops.transpose(k_kernel)
|
|
71
|
+
)
|
|
72
|
+
weights_map[f"model.layers.{i}.self_attn.k_proj.bias"] = ops.reshape(
|
|
73
|
+
attn_layer._key_dense.bias, (-1,)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Value
|
|
77
|
+
v_kernel = attn_layer._value_dense.kernel
|
|
78
|
+
v_kernel = ops.reshape(v_kernel, (backbone.hidden_dim, -1))
|
|
79
|
+
weights_map[f"model.layers.{i}.self_attn.v_proj.weight"] = (
|
|
80
|
+
ops.transpose(v_kernel)
|
|
81
|
+
)
|
|
82
|
+
weights_map[f"model.layers.{i}.self_attn.v_proj.bias"] = ops.reshape(
|
|
83
|
+
attn_layer._value_dense.bias, (-1,)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Output
|
|
87
|
+
o_kernel = attn_layer._output_dense.kernel
|
|
88
|
+
o_kernel = ops.reshape(o_kernel, (-1, backbone.hidden_dim))
|
|
89
|
+
weights_map[f"model.layers.{i}.self_attn.o_proj.weight"] = (
|
|
90
|
+
ops.transpose(o_kernel)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# --- MLP (SwiGLU) ---
|
|
94
|
+
gate_kernel = decoder_layer._feedforward_gate_dense.kernel
|
|
95
|
+
weights_map[f"model.layers.{i}.mlp.gate_proj.weight"] = ops.transpose(
|
|
96
|
+
gate_kernel
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
up_kernel = decoder_layer._feedforward_intermediate_dense.kernel
|
|
100
|
+
weights_map[f"model.layers.{i}.mlp.up_proj.weight"] = ops.transpose(
|
|
101
|
+
up_kernel
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
down_kernel = decoder_layer._feedforward_output_dense.kernel
|
|
105
|
+
weights_map[f"model.layers.{i}.mlp.down_proj.weight"] = ops.transpose(
|
|
106
|
+
down_kernel
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Final Norm
|
|
110
|
+
weights_map["model.norm.weight"] = backbone.get_layer(
|
|
111
|
+
"sequence_output_layernorm"
|
|
112
|
+
).scale
|
|
113
|
+
|
|
114
|
+
# LM Head
|
|
115
|
+
if include_lm_head:
|
|
116
|
+
if backbone.tie_word_embeddings:
|
|
117
|
+
weights_map["lm_head.weight"] = weights_map[
|
|
118
|
+
"model.embed_tokens.weight"
|
|
119
|
+
]
|
|
120
|
+
else:
|
|
121
|
+
lm_head_w = backbone.get_layer("token_embedding").reverse_embeddings
|
|
122
|
+
weights_map["lm_head.weight"] = ops.transpose(lm_head_w)
|
|
123
|
+
|
|
124
|
+
return weights_map
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_qwen_tokenizer_config(tokenizer):
|
|
128
|
+
"""Convert Keras Qwen tokenizer config to Hugging Face."""
|
|
129
|
+
return {
|
|
130
|
+
"tokenizer_class": "Qwen2Tokenizer",
|
|
131
|
+
"bos_token": None,
|
|
132
|
+
"eos_token": "<|endoftext|>",
|
|
133
|
+
"pad_token": "<|endoftext|>",
|
|
134
|
+
"unk_token": None,
|
|
135
|
+
"model_max_length": 32768,
|
|
136
|
+
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Convert huggingface models to KerasHub."""
|
|
2
2
|
|
|
3
|
+
import inspect
|
|
4
|
+
|
|
3
5
|
from keras_hub.src.models.image_classifier import ImageClassifier
|
|
4
6
|
from keras_hub.src.utils.preset_utils import PresetLoader
|
|
5
7
|
from keras_hub.src.utils.preset_utils import jax_memory_cleanup
|
|
@@ -23,6 +25,7 @@ from keras_hub.src.utils.transformers import convert_qwen
|
|
|
23
25
|
from keras_hub.src.utils.transformers import convert_qwen3
|
|
24
26
|
from keras_hub.src.utils.transformers import convert_qwen3_moe
|
|
25
27
|
from keras_hub.src.utils.transformers import convert_qwen_moe
|
|
28
|
+
from keras_hub.src.utils.transformers import convert_sam3
|
|
26
29
|
from keras_hub.src.utils.transformers import convert_smollm3
|
|
27
30
|
from keras_hub.src.utils.transformers import convert_t5gemma
|
|
28
31
|
from keras_hub.src.utils.transformers import convert_vit
|
|
@@ -76,6 +79,8 @@ class TransformersPresetLoader(PresetLoader):
|
|
|
76
79
|
self.converter = convert_qwen3_moe
|
|
77
80
|
elif model_type == "qwen3":
|
|
78
81
|
self.converter = convert_qwen3
|
|
82
|
+
elif model_type == "sam3_video":
|
|
83
|
+
self.converter = convert_sam3
|
|
79
84
|
elif model_type == "smollm3":
|
|
80
85
|
self.converter = convert_smollm3
|
|
81
86
|
elif model_type == "t5gemma":
|
|
@@ -90,7 +95,16 @@ class TransformersPresetLoader(PresetLoader):
|
|
|
90
95
|
return self.converter.backbone_cls
|
|
91
96
|
|
|
92
97
|
def load_backbone(self, cls, load_weights, **kwargs):
|
|
93
|
-
|
|
98
|
+
convert_backbone_config_param_len = len(
|
|
99
|
+
inspect.signature(self.converter.convert_backbone_config).parameters
|
|
100
|
+
)
|
|
101
|
+
if convert_backbone_config_param_len != 1:
|
|
102
|
+
backbone_kwargs, kwargs = self.get_backbone_kwargs(**kwargs)
|
|
103
|
+
keras_config = self.converter.convert_backbone_config(
|
|
104
|
+
self.config, cls, **backbone_kwargs
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
keras_config = self.converter.convert_backbone_config(self.config)
|
|
94
108
|
backbone = cls(**{**keras_config, **kwargs})
|
|
95
109
|
if load_weights:
|
|
96
110
|
jax_memory_cleanup(backbone)
|
keras_hub/src/version.py
CHANGED
keras_hub/tokenizers/__init__.py
CHANGED
|
@@ -93,6 +93,12 @@ from keras_hub.src.models.roberta.roberta_tokenizer import (
|
|
|
93
93
|
from keras_hub.src.models.roformer_v2.roformer_v2_tokenizer import (
|
|
94
94
|
RoformerV2Tokenizer as RoformerV2Tokenizer,
|
|
95
95
|
)
|
|
96
|
+
from keras_hub.src.models.rwkv7.rwkv7_tokenizer import (
|
|
97
|
+
RWKVTokenizer as RWKVTokenizer,
|
|
98
|
+
)
|
|
99
|
+
from keras_hub.src.models.sam3.sam3_tokenizer import (
|
|
100
|
+
SAM3Tokenizer as SAM3Tokenizer,
|
|
101
|
+
)
|
|
96
102
|
from keras_hub.src.models.siglip.siglip_tokenizer import (
|
|
97
103
|
SigLIPTokenizer as SigLIPTokenizer,
|
|
98
104
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: keras-hub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.26.0.dev0
|
|
4
4
|
Summary: Pretrained models for Keras.
|
|
5
5
|
Author-email: Keras team <keras-users@googlegroups.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -8,7 +8,6 @@ Project-URL: Home, https://keras.io/keras_hub/
|
|
|
8
8
|
Project-URL: Repository, https://github.com/keras-team/keras/keras_hub
|
|
9
9
|
Classifier: Development Status :: 3 - Alpha
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
@@ -18,9 +17,9 @@ Classifier: Operating System :: MacOS
|
|
|
18
17
|
Classifier: Intended Audience :: Science/Research
|
|
19
18
|
Classifier: Topic :: Scientific/Engineering
|
|
20
19
|
Classifier: Topic :: Software Development
|
|
21
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.11
|
|
22
21
|
Description-Content-Type: text/markdown
|
|
23
|
-
Requires-Dist: keras>=3.
|
|
22
|
+
Requires-Dist: keras>=3.13
|
|
24
23
|
Requires-Dist: absl-py
|
|
25
24
|
Requires-Dist: numpy
|
|
26
25
|
Requires-Dist: packaging
|
|
@@ -31,7 +30,8 @@ Requires-Dist: tensorflow-text; platform_system != "Windows"
|
|
|
31
30
|
|
|
32
31
|
# KerasHub: Multi-framework Pretrained Models
|
|
33
32
|
[](https://github.com/keras-team/keras-hub/actions?query=workflow%3ATests+branch%3Amaster)
|
|
34
|
-

|
|
34
|
+
[](https://www.kaggle.com/organizations/keras/models)
|
|
35
35
|
[](https://github.com/keras-team/keras-hub/issues)
|
|
36
36
|
|
|
37
37
|
> [!IMPORTANT]
|
|
@@ -41,7 +41,7 @@ Requires-Dist: tensorflow-text; platform_system != "Windows"
|
|
|
41
41
|
**KerasHub** is a pretrained modeling library that aims to be simple, flexible,
|
|
42
42
|
and fast. The library provides [Keras 3](https://keras.io/keras_3/)
|
|
43
43
|
implementations of popular model architectures, paired with a collection of
|
|
44
|
-
pretrained checkpoints available on [Kaggle Models](https://kaggle.com/models
|
|
44
|
+
pretrained checkpoints available on [Kaggle Models](https://www.kaggle.com/organizations/keras/models).
|
|
45
45
|
Models can be used with text, image, and audio data for generation, classification,
|
|
46
46
|
and many other built in tasks.
|
|
47
47
|
|
|
@@ -133,13 +133,6 @@ To install the latest KerasHub release with Keras 3, simply run:
|
|
|
133
133
|
pip install --upgrade keras-hub
|
|
134
134
|
```
|
|
135
135
|
|
|
136
|
-
Our text tokenizers are based on TensorFlow Text. Hence, if you are using any
|
|
137
|
-
model which has language as a modality, you will have to run:
|
|
138
|
-
|
|
139
|
-
```
|
|
140
|
-
pip install --upgrade keras-hub[nlp]
|
|
141
|
-
```
|
|
142
|
-
|
|
143
136
|
To install the latest nightly changes for both KerasHub and Keras, you can use
|
|
144
137
|
our nightly package.
|
|
145
138
|
|