keras-hub-nightly 0.21.0.dev202505050407__py3-none-any.whl → 0.21.0.dev202505070407__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/models/__init__.py +21 -0
- keras_hub/src/models/backbone.py +5 -2
- keras_hub/src/models/cspnet/cspnet_backbone.py +51 -26
- keras_hub/src/models/cspnet/cspnet_presets.py +38 -3
- keras_hub/src/models/mixtral/mixtral_attention.py +263 -0
- keras_hub/src/models/mixtral/mixtral_backbone.py +207 -0
- keras_hub/src/models/mixtral/mixtral_causal_lm.py +281 -0
- keras_hub/src/models/mixtral/mixtral_causal_lm_preprocessor.py +76 -0
- keras_hub/src/models/mixtral/mixtral_decoder.py +494 -0
- keras_hub/src/models/mixtral/mixtral_layer_norm.py +34 -0
- keras_hub/src/models/mixtral/mixtral_tokenizer.py +21 -0
- keras_hub/src/models/qwen/qwen_attention.py +3 -1
- keras_hub/src/models/qwen/qwen_presets.py +61 -0
- keras_hub/src/models/qwen_moe/__init__.py +0 -0
- keras_hub/src/models/qwen_moe/qwen_moe_attention.py +377 -0
- keras_hub/src/models/qwen_moe/qwen_moe_backbone.py +373 -0
- keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py +350 -0
- keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py +17 -0
- keras_hub/src/models/qwen_moe/qwen_moe_decoder.py +625 -0
- keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py +32 -0
- keras_hub/src/models/qwen_moe/qwen_moe_tokenizer.py +46 -0
- keras_hub/src/models/retinanet/retinanet_image_converter.py +0 -13
- keras_hub/src/models/retinanet/retinanet_presets.py +2 -2
- keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +0 -18
- keras_hub/src/models/segformer/segformer_presets.py +12 -12
- keras_hub/src/models/task.py +5 -2
- keras_hub/src/utils/keras_utils.py +11 -0
- keras_hub/src/utils/preset_utils.py +69 -9
- keras_hub/src/utils/tensor_utils.py +27 -1
- keras_hub/src/utils/timm/convert_cspnet.py +94 -23
- keras_hub/src/utils/timm/preset_loader.py +6 -6
- keras_hub/src/utils/transformers/convert_mixtral.py +139 -0
- keras_hub/src/utils/transformers/convert_qwen_moe.py +253 -0
- keras_hub/src/utils/transformers/preset_loader.py +6 -0
- keras_hub/src/version.py +1 -1
- keras_hub/tokenizers/__init__.py +6 -0
- {keras_hub_nightly-0.21.0.dev202505050407.dist-info → keras_hub_nightly-0.21.0.dev202505070407.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.21.0.dev202505050407.dist-info → keras_hub_nightly-0.21.0.dev202505070407.dist-info}/RECORD +40 -22
- {keras_hub_nightly-0.21.0.dev202505050407.dist-info → keras_hub_nightly-0.21.0.dev202505070407.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.21.0.dev202505050407.dist-info → keras_hub_nightly-0.21.0.dev202505070407.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from keras_hub.src.models.mixtral.mixtral_backbone import MixtralBackbone
|
4
|
+
from keras_hub.src.utils.preset_utils import get_file
|
5
|
+
|
6
|
+
backbone_cls = MixtralBackbone
|
7
|
+
|
8
|
+
|
9
|
+
def convert_backbone_config(transformers_config):
|
10
|
+
return {
|
11
|
+
"vocabulary_size": transformers_config["vocab_size"],
|
12
|
+
"num_layers": transformers_config["num_hidden_layers"],
|
13
|
+
"num_query_heads": transformers_config["num_attention_heads"],
|
14
|
+
"hidden_dim": transformers_config["hidden_size"],
|
15
|
+
"intermediate_dim": transformers_config["intermediate_size"],
|
16
|
+
"num_key_value_heads": transformers_config["num_key_value_heads"],
|
17
|
+
"num_experts": transformers_config["num_local_experts"],
|
18
|
+
"top_k": transformers_config["num_experts_per_tok"],
|
19
|
+
"rope_max_wavelength": transformers_config["rope_theta"],
|
20
|
+
"layer_norm_epsilon": transformers_config["rms_norm_eps"],
|
21
|
+
"sliding_window": transformers_config["sliding_window"],
|
22
|
+
"output_router_logits": transformers_config["output_router_logits"],
|
23
|
+
}
|
24
|
+
|
25
|
+
|
26
|
+
def convert_weights(backbone, loader, transformers_config):
|
27
|
+
# Embeddings
|
28
|
+
loader.port_weight(
|
29
|
+
keras_variable=backbone.get_layer("token_embedding").embeddings,
|
30
|
+
hf_weight_key="model.embed_tokens.weight",
|
31
|
+
)
|
32
|
+
loader.port_weight(
|
33
|
+
keras_variable=backbone.get_layer("token_embedding").reverse_embeddings,
|
34
|
+
hf_weight_key="lm_head.weight",
|
35
|
+
hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
|
36
|
+
)
|
37
|
+
|
38
|
+
def transpose_and_reshape(x, shape):
|
39
|
+
return np.reshape(np.transpose(x), shape)
|
40
|
+
|
41
|
+
for i in range(backbone.num_layers):
|
42
|
+
decoder_layer = backbone.get_layer(f"transformer_layer_{i}")
|
43
|
+
|
44
|
+
# Input layernorm
|
45
|
+
loader.port_weight(
|
46
|
+
keras_variable=decoder_layer._self_attention_layernorm.scale,
|
47
|
+
hf_weight_key=f"model.layers.{i}.input_layernorm.weight",
|
48
|
+
)
|
49
|
+
|
50
|
+
# Attention layers
|
51
|
+
## Query
|
52
|
+
loader.port_weight(
|
53
|
+
keras_variable=decoder_layer._self_attention_layer.query_dense.kernel,
|
54
|
+
hf_weight_key=f"model.layers.{i}.self_attn.q_proj.weight",
|
55
|
+
hook_fn=transpose_and_reshape,
|
56
|
+
)
|
57
|
+
## Key
|
58
|
+
loader.port_weight(
|
59
|
+
keras_variable=decoder_layer._self_attention_layer.key_dense.kernel,
|
60
|
+
hf_weight_key=f"model.layers.{i}.self_attn.k_proj.weight",
|
61
|
+
hook_fn=transpose_and_reshape,
|
62
|
+
)
|
63
|
+
## Value
|
64
|
+
loader.port_weight(
|
65
|
+
keras_variable=decoder_layer._self_attention_layer.value_dense.kernel,
|
66
|
+
hf_weight_key=f"model.layers.{i}.self_attn.v_proj.weight",
|
67
|
+
hook_fn=transpose_and_reshape,
|
68
|
+
)
|
69
|
+
## Output
|
70
|
+
loader.port_weight(
|
71
|
+
keras_variable=decoder_layer._self_attention_layer._output_dense.kernel,
|
72
|
+
hf_weight_key=f"model.layers.{i}.self_attn.o_proj.weight",
|
73
|
+
hook_fn=transpose_and_reshape,
|
74
|
+
)
|
75
|
+
|
76
|
+
# MoE layers
|
77
|
+
# Router gate
|
78
|
+
loader.port_weight(
|
79
|
+
keras_variable=decoder_layer._sparse_moe_block._sparse_feedforward_gate_dense.kernel,
|
80
|
+
hf_weight_key=f"model.layers.{i}.block_sparse_moe.gate.weight",
|
81
|
+
hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
|
82
|
+
)
|
83
|
+
|
84
|
+
# Batched experts: w1 (gate), w3 (intermediate), and w2 (output) weights
|
85
|
+
gate_weights_list = []
|
86
|
+
intermediate_weights_list = []
|
87
|
+
output_weights_list = []
|
88
|
+
for expert_idx in range(backbone.num_experts):
|
89
|
+
# Load w1 (gate dense) for each expert
|
90
|
+
w1 = loader.get_tensor(
|
91
|
+
f"model.layers.{i}.block_sparse_moe.experts.{expert_idx}.w1.weight"
|
92
|
+
)
|
93
|
+
w1_transposed = np.transpose(w1, axes=(1, 0))
|
94
|
+
gate_weights_list.append(w1_transposed)
|
95
|
+
|
96
|
+
w3 = loader.get_tensor(
|
97
|
+
f"model.layers.{i}.block_sparse_moe.experts.{expert_idx}.w3.weight"
|
98
|
+
)
|
99
|
+
w3_transposed = np.transpose(w3, axes=(1, 0))
|
100
|
+
intermediate_weights_list.append(w3_transposed)
|
101
|
+
|
102
|
+
w2 = loader.get_tensor(
|
103
|
+
f"model.layers.{i}.block_sparse_moe.experts.{expert_idx}.w2.weight"
|
104
|
+
)
|
105
|
+
w2_transposed = np.transpose(w2, axes=(1, 0))
|
106
|
+
output_weights_list.append(w2_transposed)
|
107
|
+
|
108
|
+
gate_batched = np.stack(gate_weights_list, axis=0)
|
109
|
+
intermediate_batched = np.stack(intermediate_weights_list, axis=0)
|
110
|
+
output_batched = np.stack(output_weights_list, axis=0)
|
111
|
+
|
112
|
+
# Assign batched weights to expert_bank
|
113
|
+
decoder_layer._sparse_moe_block.expert_bank._expert_feedforward_gate_dense.assign(
|
114
|
+
gate_batched
|
115
|
+
)
|
116
|
+
decoder_layer._sparse_moe_block.expert_bank._expert_feedforward_intermediate_dense.assign(
|
117
|
+
intermediate_batched
|
118
|
+
)
|
119
|
+
decoder_layer._sparse_moe_block.expert_bank._expert_feedforward_output_dense.assign(
|
120
|
+
output_batched
|
121
|
+
)
|
122
|
+
|
123
|
+
# Feedforward layernorm
|
124
|
+
loader.port_weight(
|
125
|
+
keras_variable=decoder_layer._feedforward_layernorm.scale,
|
126
|
+
hf_weight_key=f"model.layers.{i}.post_attention_layernorm.weight",
|
127
|
+
)
|
128
|
+
|
129
|
+
# Final normalization layer
|
130
|
+
loader.port_weight(
|
131
|
+
keras_variable=backbone.get_layer("sequence_output_layernorm").scale,
|
132
|
+
hf_weight_key="model.norm.weight",
|
133
|
+
)
|
134
|
+
|
135
|
+
return backbone
|
136
|
+
|
137
|
+
|
138
|
+
def convert_tokenizer(cls, preset, **kwargs):
|
139
|
+
return cls(get_file(preset, "tokenizer.model"), **kwargs)
|
@@ -0,0 +1,253 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
from keras_hub.src.models.qwen_moe.qwen_moe_backbone import QwenMoeBackbone
|
4
|
+
from keras_hub.src.utils.preset_utils import load_json
|
5
|
+
|
6
|
+
backbone_cls = QwenMoeBackbone
|
7
|
+
|
8
|
+
|
9
|
+
def convert_backbone_config(transformers_config):
|
10
|
+
return {
|
11
|
+
"vocabulary_size": transformers_config["vocab_size"],
|
12
|
+
"hidden_dim": transformers_config["hidden_size"],
|
13
|
+
"num_layers": transformers_config["num_hidden_layers"],
|
14
|
+
"num_query_heads": transformers_config["num_attention_heads"],
|
15
|
+
"num_key_value_heads": transformers_config["num_key_value_heads"],
|
16
|
+
"intermediate_dim": transformers_config["intermediate_size"],
|
17
|
+
"moe_intermediate_dim": transformers_config["moe_intermediate_size"],
|
18
|
+
"shared_expert_intermediate_dim": transformers_config[
|
19
|
+
"shared_expert_intermediate_size"
|
20
|
+
],
|
21
|
+
"num_experts": transformers_config["num_experts"],
|
22
|
+
"top_k": transformers_config["num_experts_per_tok"],
|
23
|
+
"norm_top_k_prob": transformers_config["norm_topk_prob"],
|
24
|
+
"decoder_sparse_step": transformers_config["decoder_sparse_step"],
|
25
|
+
"layer_norm_epsilon": transformers_config["rms_norm_eps"],
|
26
|
+
"rope_max_wavelength": transformers_config["rope_theta"],
|
27
|
+
"use_sliding_window": transformers_config["use_sliding_window"],
|
28
|
+
"sliding_window_size": transformers_config["sliding_window"],
|
29
|
+
"output_router_logits": transformers_config["output_router_logits"],
|
30
|
+
"router_aux_loss_coefficient": transformers_config[
|
31
|
+
"router_aux_loss_coef"
|
32
|
+
],
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
def convert_weights(backbone, loader, transformers_config):
|
37
|
+
loader.port_weight(
|
38
|
+
keras_variable=backbone.get_layer("token_embedding").embeddings,
|
39
|
+
hf_weight_key="model.embed_tokens.weight",
|
40
|
+
)
|
41
|
+
if not backbone.tie_word_embeddings:
|
42
|
+
loader.port_weight(
|
43
|
+
keras_variable=backbone.get_layer(
|
44
|
+
"token_embedding"
|
45
|
+
).reverse_embeddings,
|
46
|
+
hf_weight_key="lm_head.weight",
|
47
|
+
# rearrange_pattern="b a -> a b",
|
48
|
+
hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
|
49
|
+
)
|
50
|
+
|
51
|
+
def transpose_and_reshape(x, shape):
|
52
|
+
return np.reshape(np.transpose(x), shape)
|
53
|
+
|
54
|
+
for i in range(backbone.num_layers):
|
55
|
+
decoder_layer = backbone.get_layer(f"transformer_layer_{i}")
|
56
|
+
|
57
|
+
# Input layernorm
|
58
|
+
loader.port_weight(
|
59
|
+
keras_variable=decoder_layer._self_attention_layernorm.scale,
|
60
|
+
hf_weight_key=f"model.layers.{i}.input_layernorm.weight",
|
61
|
+
)
|
62
|
+
|
63
|
+
# Attention layers
|
64
|
+
|
65
|
+
## Query
|
66
|
+
loader.port_weight(
|
67
|
+
keras_variable=decoder_layer._self_attention_layer.query_dense.kernel,
|
68
|
+
hf_weight_key=f"model.layers.{i}.self_attn.q_proj.weight",
|
69
|
+
hook_fn=transpose_and_reshape,
|
70
|
+
)
|
71
|
+
loader.port_weight(
|
72
|
+
keras_variable=decoder_layer._self_attention_layer.query_dense.bias,
|
73
|
+
hf_weight_key=f"model.layers.{i}.self_attn.q_proj.bias",
|
74
|
+
hook_fn=transpose_and_reshape,
|
75
|
+
)
|
76
|
+
## Key
|
77
|
+
loader.port_weight(
|
78
|
+
keras_variable=decoder_layer._self_attention_layer.key_dense.kernel,
|
79
|
+
hf_weight_key=f"model.layers.{i}.self_attn.k_proj.weight",
|
80
|
+
hook_fn=transpose_and_reshape,
|
81
|
+
)
|
82
|
+
loader.port_weight(
|
83
|
+
keras_variable=decoder_layer._self_attention_layer.key_dense.bias,
|
84
|
+
hf_weight_key=f"model.layers.{i}.self_attn.k_proj.bias",
|
85
|
+
hook_fn=transpose_and_reshape,
|
86
|
+
)
|
87
|
+
## Value
|
88
|
+
loader.port_weight(
|
89
|
+
keras_variable=decoder_layer._self_attention_layer.value_dense.kernel,
|
90
|
+
hf_weight_key=f"model.layers.{i}.self_attn.v_proj.weight",
|
91
|
+
hook_fn=transpose_and_reshape,
|
92
|
+
)
|
93
|
+
loader.port_weight(
|
94
|
+
keras_variable=decoder_layer._self_attention_layer.value_dense.bias,
|
95
|
+
hf_weight_key=f"model.layers.{i}.self_attn.v_proj.bias",
|
96
|
+
hook_fn=transpose_and_reshape,
|
97
|
+
)
|
98
|
+
## Output
|
99
|
+
loader.port_weight(
|
100
|
+
keras_variable=decoder_layer._self_attention_layer._output_dense.kernel,
|
101
|
+
hf_weight_key=f"model.layers.{i}.self_attn.o_proj.weight",
|
102
|
+
# rearrange_patterns="c (a b) -> a b c",
|
103
|
+
# rearrange_dims={"a": backbone.num_query_heads},
|
104
|
+
hook_fn=transpose_and_reshape,
|
105
|
+
)
|
106
|
+
|
107
|
+
# MLP layers
|
108
|
+
if (
|
109
|
+
(i not in backbone.mlp_only_layers)
|
110
|
+
and backbone.num_experts > 0
|
111
|
+
and ((i + 1) % backbone.decoder_sparse_step == 0)
|
112
|
+
):
|
113
|
+
# MoE layers
|
114
|
+
loader.port_weight(
|
115
|
+
keras_variable=decoder_layer.mlp._sparse_feedforward_gate_dense.kernel,
|
116
|
+
hf_weight_key=f"model.layers.{i}.mlp.gate.weight",
|
117
|
+
# rearrange_patterns="b a -> a b",
|
118
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
119
|
+
hf_tensor, axes=(1, 0)
|
120
|
+
),
|
121
|
+
)
|
122
|
+
# Batched experts: gate_up_proj and down_proj
|
123
|
+
gate_up_proj_list = []
|
124
|
+
down_proj_list = []
|
125
|
+
for expert_idx in range(backbone.num_experts):
|
126
|
+
# Load gate_proj and up_proj for each expert
|
127
|
+
gate_proj = loader.get_tensor(
|
128
|
+
f"model.layers.{i}.mlp.experts.{expert_idx}.gate_proj.weight"
|
129
|
+
)
|
130
|
+
up_proj = loader.get_tensor(
|
131
|
+
f"model.layers.{i}.mlp.experts.{expert_idx}.up_proj.weight"
|
132
|
+
)
|
133
|
+
# Transpose to (hidden_dim, intermediate_dim)
|
134
|
+
gate_proj = np.transpose(gate_proj, axes=(1, 0))
|
135
|
+
up_proj = np.transpose(up_proj, axes=(1, 0))
|
136
|
+
# Concatenate gate_proj and up_proj along the last dimension
|
137
|
+
gate_up_proj = np.concatenate([gate_proj, up_proj], axis=-1)
|
138
|
+
gate_up_proj_list.append(gate_up_proj)
|
139
|
+
|
140
|
+
# Load down_proj for each expert
|
141
|
+
down_proj = loader.get_tensor(
|
142
|
+
f"model.layers.{i}.mlp.experts.{expert_idx}.down_proj.weight"
|
143
|
+
)
|
144
|
+
down_proj = np.transpose(
|
145
|
+
down_proj, axes=(1, 0)
|
146
|
+
) # (intermediate_dim, hidden_dim)
|
147
|
+
down_proj_list.append(down_proj)
|
148
|
+
|
149
|
+
# Stack the lists to create batched weights
|
150
|
+
gate_up_proj_batched = np.stack(
|
151
|
+
gate_up_proj_list, axis=0
|
152
|
+
) # (num_experts, hidden_dim, 2 * intermediate_dim)
|
153
|
+
down_proj_batched = np.stack(
|
154
|
+
down_proj_list, axis=0
|
155
|
+
) # (num_experts, intermediate_dim, hidden_dim)
|
156
|
+
|
157
|
+
# Assign batched weights to expert_bank
|
158
|
+
decoder_layer.mlp.expert_bank._expert_feedforward_gate_dense.assign(
|
159
|
+
gate_up_proj_batched
|
160
|
+
)
|
161
|
+
decoder_layer.mlp.expert_bank._expert_feedforward_output_dense.assign(
|
162
|
+
down_proj_batched
|
163
|
+
)
|
164
|
+
|
165
|
+
loader.port_weight(
|
166
|
+
keras_variable=decoder_layer.mlp.shared_expert_dense._feedforward_intermediate_dense.kernel,
|
167
|
+
hf_weight_key=f"model.layers.{i}.mlp.shared_expert.up_proj.weight",
|
168
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
169
|
+
hf_tensor, axes=(1, 0)
|
170
|
+
),
|
171
|
+
)
|
172
|
+
loader.port_weight(
|
173
|
+
keras_variable=decoder_layer.mlp.shared_expert_dense._feedforward_output_dense.kernel,
|
174
|
+
hf_weight_key=f"model.layers.{i}.mlp.shared_expert.down_proj.weight",
|
175
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
176
|
+
hf_tensor, axes=(1, 0)
|
177
|
+
),
|
178
|
+
)
|
179
|
+
loader.port_weight(
|
180
|
+
keras_variable=decoder_layer.mlp.shared_expert_dense._feedforward_gate_dense.kernel,
|
181
|
+
hf_weight_key=f"model.layers.{i}.mlp.shared_expert.gate_proj.weight",
|
182
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
183
|
+
hf_tensor, axes=(1, 0)
|
184
|
+
),
|
185
|
+
)
|
186
|
+
|
187
|
+
loader.port_weight(
|
188
|
+
keras_variable=decoder_layer.mlp.shared_expert_gate_dense.kernel,
|
189
|
+
hf_weight_key=f"model.layers.{i}.mlp.shared_expert_gate.weight",
|
190
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
191
|
+
hf_tensor, axes=(1, 0)
|
192
|
+
),
|
193
|
+
)
|
194
|
+
else:
|
195
|
+
loader.port_weight(
|
196
|
+
keras_variable=decoder_layer._feedforward_intermediate_dense.kernel,
|
197
|
+
hf_weight_key=f"model.layers.{i}.mlp.up_proj.weight",
|
198
|
+
# rearrange_patterns="b a -> a b",
|
199
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
200
|
+
hf_tensor, axes=(1, 0)
|
201
|
+
),
|
202
|
+
)
|
203
|
+
loader.port_weight(
|
204
|
+
keras_variable=decoder_layer._feedforward_output_dense.kernel,
|
205
|
+
hf_weight_key=f"model.layers.{i}.mlp.down_proj.weight",
|
206
|
+
# rearrange_patterns="b a -> a b",
|
207
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
208
|
+
hf_tensor, axes=(1, 0)
|
209
|
+
),
|
210
|
+
)
|
211
|
+
loader.port_weight(
|
212
|
+
keras_variable=decoder_layer._feedforward_gate_dense.kernel,
|
213
|
+
hf_weight_key=f"model.layers.{i}.mlp.gate_proj.weight",
|
214
|
+
# rearrange_patterns="b a -> a b",
|
215
|
+
hook_fn=lambda hf_tensor, _: np.transpose(
|
216
|
+
hf_tensor, axes=(1, 0)
|
217
|
+
),
|
218
|
+
)
|
219
|
+
|
220
|
+
# Feedforward layernorm
|
221
|
+
loader.port_weight(
|
222
|
+
keras_variable=decoder_layer._feedforward_layernorm.scale,
|
223
|
+
hf_weight_key=f"model.layers.{i}.post_attention_layernorm.weight",
|
224
|
+
)
|
225
|
+
|
226
|
+
# Final normalization layer
|
227
|
+
loader.port_weight(
|
228
|
+
keras_variable=backbone.get_layer("sequence_output_layernorm").scale,
|
229
|
+
hf_weight_key="model.norm.weight",
|
230
|
+
)
|
231
|
+
|
232
|
+
return backbone
|
233
|
+
|
234
|
+
|
235
|
+
def convert_tokenizer(cls, preset, **kwargs):
|
236
|
+
tokenizer_config = load_json(preset, "tokenizer.json")
|
237
|
+
vocab = tokenizer_config["model"]["vocab"]
|
238
|
+
merges = tokenizer_config["model"]["merges"]
|
239
|
+
|
240
|
+
# Load all special tokens with the exception of "reserved" ones.
|
241
|
+
special_tokens = set()
|
242
|
+
for token in tokenizer_config["added_tokens"]:
|
243
|
+
if not token["content"].startswith("<|reserved_special_token_"):
|
244
|
+
vocab[token["content"]] = token["id"]
|
245
|
+
special_tokens.add(token["content"])
|
246
|
+
|
247
|
+
kwargs.update(
|
248
|
+
{
|
249
|
+
"unsplittable_tokens": list(special_tokens),
|
250
|
+
}
|
251
|
+
)
|
252
|
+
|
253
|
+
return cls(vocabulary=vocab, merges=merges, **kwargs)
|
@@ -11,8 +11,10 @@ from keras_hub.src.utils.transformers import convert_gemma
|
|
11
11
|
from keras_hub.src.utils.transformers import convert_gpt2
|
12
12
|
from keras_hub.src.utils.transformers import convert_llama3
|
13
13
|
from keras_hub.src.utils.transformers import convert_mistral
|
14
|
+
from keras_hub.src.utils.transformers import convert_mixtral
|
14
15
|
from keras_hub.src.utils.transformers import convert_pali_gemma
|
15
16
|
from keras_hub.src.utils.transformers import convert_qwen
|
17
|
+
from keras_hub.src.utils.transformers import convert_qwen_moe
|
16
18
|
from keras_hub.src.utils.transformers import convert_vit
|
17
19
|
from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
|
18
20
|
|
@@ -44,6 +46,10 @@ class TransformersPresetLoader(PresetLoader):
|
|
44
46
|
self.converter = convert_vit
|
45
47
|
elif model_type == "qwen2":
|
46
48
|
self.converter = convert_qwen
|
49
|
+
elif model_type == "mixtral":
|
50
|
+
self.converter = convert_mixtral
|
51
|
+
elif model_type == "qwen2_moe":
|
52
|
+
self.converter = convert_qwen_moe
|
47
53
|
else:
|
48
54
|
raise ValueError(
|
49
55
|
"KerasHub has no converter for huggingface/transformers models "
|
keras_hub/src/version.py
CHANGED
keras_hub/tokenizers/__init__.py
CHANGED
@@ -55,6 +55,9 @@ from keras_hub.src.models.llama3.llama3_tokenizer import (
|
|
55
55
|
from keras_hub.src.models.mistral.mistral_tokenizer import (
|
56
56
|
MistralTokenizer as MistralTokenizer,
|
57
57
|
)
|
58
|
+
from keras_hub.src.models.mixtral.mixtral_tokenizer import (
|
59
|
+
MixtralTokenizer as MixtralTokenizer,
|
60
|
+
)
|
58
61
|
from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer as OPTTokenizer
|
59
62
|
from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
|
60
63
|
PaliGemmaTokenizer as PaliGemmaTokenizer,
|
@@ -68,6 +71,9 @@ from keras_hub.src.models.qwen.qwen_tokenizer import (
|
|
68
71
|
from keras_hub.src.models.qwen.qwen_tokenizer import (
|
69
72
|
QwenTokenizer as QwenTokenizer,
|
70
73
|
)
|
74
|
+
from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import (
|
75
|
+
QwenMoeTokenizer as QwenMoeTokenizer,
|
76
|
+
)
|
71
77
|
from keras_hub.src.models.roberta.roberta_tokenizer import (
|
72
78
|
RobertaTokenizer as RobertaTokenizer,
|
73
79
|
)
|
@@ -1,11 +1,11 @@
|
|
1
1
|
keras_hub/__init__.py,sha256=bJbUZkqwhZvTb1Tqx1fbkq6mzBYiEyq-Hin3oQIkhdE,558
|
2
2
|
keras_hub/layers/__init__.py,sha256=LhMUEcl3xJwqr0XphTgRZ5Ayz5SsBAKV19c0XwSzj1I,4952
|
3
3
|
keras_hub/metrics/__init__.py,sha256=KYalsMPBnfwim9BdGHFfJ5WxUKFXOQ1QoKIMT_0lwlM,439
|
4
|
-
keras_hub/models/__init__.py,sha256=
|
4
|
+
keras_hub/models/__init__.py,sha256=kFWNpjemQ8FLzDlFfMdAOOXJKtxuVHFxyZm7-1mH4Gc,25909
|
5
5
|
keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
|
6
6
|
keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
|
8
|
-
keras_hub/src/version.py,sha256=
|
8
|
+
keras_hub/src/version.py,sha256=aaWmSzLN-AkMZRmAoGmZ2N0SrrxAlh7bwjgP-M3xnCs,222
|
9
9
|
keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
|
@@ -41,7 +41,7 @@ keras_hub/src/metrics/rouge_base.py,sha256=Pt2DUznhTTeR-fX1nQ_wSbPtmuTgxQTvrGpu8
|
|
41
41
|
keras_hub/src/metrics/rouge_l.py,sha256=JlZhMBV6wS_6zMd57pkTc6yxHkEJT9fVQMlPZKekQzQ,2729
|
42
42
|
keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemupRc,3620
|
43
43
|
keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
keras_hub/src/models/backbone.py,sha256=
|
44
|
+
keras_hub/src/models/backbone.py,sha256=KS2x3HFWKhEYhroUFT3uZgSkeW_48zPGqUNvxCDDIQQ,11534
|
45
45
|
keras_hub/src/models/causal_lm.py,sha256=ReaF-i3SHsCkHh4c28jM72QjMQ8x7yiCwG39FRb-7KE,16786
|
46
46
|
keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
|
47
47
|
keras_hub/src/models/feature_pyramid_backbone.py,sha256=clEW-TTQSVJ_5qFNdDF0iABkin1p_xlBUFjJrC7T0IA,2247
|
@@ -58,7 +58,7 @@ keras_hub/src/models/object_detector_preprocessor.py,sha256=kOSVRNFAg-UjtrCEVBdH
|
|
58
58
|
keras_hub/src/models/preprocessor.py,sha256=kBlahgVST3L6vKeWDM4fXuDoXa6pwaJW2A5__L85wFU,8487
|
59
59
|
keras_hub/src/models/seq_2_seq_lm.py,sha256=w0gX-5YZjatfvAJmFAgSHyqS_BLqc8FF8DPLGK8mrgI,1864
|
60
60
|
keras_hub/src/models/seq_2_seq_lm_preprocessor.py,sha256=DJmm4VTt8AdLtq1k9YKl_VR31cKUHaYjfSbrk7-fJqA,9667
|
61
|
-
keras_hub/src/models/task.py,sha256=
|
61
|
+
keras_hub/src/models/task.py,sha256=e9zK2zHgeOkjNACcCmAf-lGuEGF_eRoP_lKlirdIXuk,14817
|
62
62
|
keras_hub/src/models/text_classifier.py,sha256=B6cTYDbDZW8vRvenXrLwgMMVIYMb7Pr14GvX8C_wclQ,4159
|
63
63
|
keras_hub/src/models/text_classifier_preprocessor.py,sha256=EoWp-GHnaLnAKTdAzDmC-soAV92ATF3QozdubdV2WXI,4722
|
64
64
|
keras_hub/src/models/text_to_image.py,sha256=NIy4S6Fh8MsbNiskAFhjmFXgRiiFqn_rOvpGOO6LlF0,13390
|
@@ -110,11 +110,11 @@ keras_hub/src/models/clip/clip_tokenizer.py,sha256=6gIm_LWRbCeBQUI9M2gA8-OXb4tXG
|
|
110
110
|
keras_hub/src/models/clip/clip_vision_embedding.py,sha256=6_qC7T1dqKd-39EreGmHZj-YfjOLEDDKjWnEKcKIyuY,3667
|
111
111
|
keras_hub/src/models/clip/clip_vision_encoder.py,sha256=q62MXySZN38uCsjqq8cttfBxD7P5abaKQV2i8_u4N6E,6385
|
112
112
|
keras_hub/src/models/cspnet/__init__.py,sha256=TOpvk2cfOVv1bPA1BOGZj0mhmhc6E98zZmW9e0PIvhk,257
|
113
|
-
keras_hub/src/models/cspnet/cspnet_backbone.py,sha256=
|
113
|
+
keras_hub/src/models/cspnet/cspnet_backbone.py,sha256=meHzxubG_9vHQHSelDfrROaQERkDiWkjTtk_gKaWsDc,42457
|
114
114
|
keras_hub/src/models/cspnet/cspnet_image_classifier.py,sha256=JqfBHIBTFxaLOyAWx6TdXs0aAOMbcCx1oo47RoQnytc,510
|
115
115
|
keras_hub/src/models/cspnet/cspnet_image_classifier_preprocessor.py,sha256=ACRnOhjslk2ZZhpPfJioW4um4RLYa-Suk59z9wa5vfo,543
|
116
116
|
keras_hub/src/models/cspnet/cspnet_image_converter.py,sha256=f-ICTY2T-RlCykU6qOHDxg0fY7ECfZ_xpSJzIVmbvpc,342
|
117
|
-
keras_hub/src/models/cspnet/cspnet_presets.py,sha256=
|
117
|
+
keras_hub/src/models/cspnet/cspnet_presets.py,sha256=n01_7DTvbmaA_qs2GWiNLkBXNrrEvigPXSGc2NDTot8,1870
|
118
118
|
keras_hub/src/models/deberta_v3/__init__.py,sha256=6E-QtAD1uvTBobrn5bUoyB1qtaCJU-t73TtbAEH6i9g,288
|
119
119
|
keras_hub/src/models/deberta_v3/deberta_v3_backbone.py,sha256=oXdV7naTiMowuU3GsXEUo5K0GXiKbPKxdo27o5fXWjc,7258
|
120
120
|
keras_hub/src/models/deberta_v3/deberta_v3_masked_lm.py,sha256=ADBktf1DdiP9T6LCaMhdFiZ_mUbBRKMekY5mGwAeJIo,4186
|
@@ -250,6 +250,13 @@ keras_hub/src/models/mit/mit_image_classifier_preprocessor.py,sha256=oNYs-pUK8Vn
|
|
250
250
|
keras_hub/src/models/mit/mit_image_converter.py,sha256=Mw7nV-OzyBveGuZUNFsPPKyq9jXJVW2_cVH024CNkXM,311
|
251
251
|
keras_hub/src/models/mit/mit_layers.py,sha256=HUJO5uhJ6jgwANpwbQdPlEVwLRVb3BZQ-Ftjg3B9XvY,9734
|
252
252
|
keras_hub/src/models/mit/mit_presets.py,sha256=ooLrh2OoGZKxnCGnhB6BynYJtVCXH7nDDFhgQRWt36U,4528
|
253
|
+
keras_hub/src/models/mixtral/mixtral_attention.py,sha256=rdUBjIFQZKBpyCXlXMDgmB8gLCk0ngnhdhNs_twFE_c,9089
|
254
|
+
keras_hub/src/models/mixtral/mixtral_backbone.py,sha256=vUAFXvqwVBgKxYbOsqIHzPN59bhaDrGWwOnBCzeUtt0,8034
|
255
|
+
keras_hub/src/models/mixtral/mixtral_causal_lm.py,sha256=JA1t6xTeaYX_fNo9ftRyvzdRDG3vndC-Rlwn5fnsbQo,12001
|
256
|
+
keras_hub/src/models/mixtral/mixtral_causal_lm_preprocessor.py,sha256=q2qXa9QAUWBvOWv9DeNvwsBNXSORJAbQFoQsWQ7e8V8,3079
|
257
|
+
keras_hub/src/models/mixtral/mixtral_decoder.py,sha256=CvOjhTxPnGQ_HNknZXRI6Cx1kpuHG99_TiOh-mNcsDw,18190
|
258
|
+
keras_hub/src/models/mixtral/mixtral_layer_norm.py,sha256=zfbDKZEb45FTwP0zQd7WPPp8tuiGoSNfS-DRYWkZyWw,1031
|
259
|
+
keras_hub/src/models/mixtral/mixtral_tokenizer.py,sha256=Kc233k879QMyX164X_CzWbqpnqEkKWNqa648guTGkBk,661
|
253
260
|
keras_hub/src/models/mobilenet/__init__.py,sha256=hxkNGGj_iAMu62iooUDEPA818sNOIgjG7pXMLEMOsAE,275
|
254
261
|
keras_hub/src/models/mobilenet/mobilenet_backbone.py,sha256=aZBSFeLUObYYoi3od9DI1KfgPCqh5GHTcAI8Y2ZHShA,29536
|
255
262
|
keras_hub/src/models/mobilenet/mobilenet_image_classifier.py,sha256=rgPVJeSRqyp3-Fgf5ERbg_97c4cSawRmAtoJpdBN8WA,2437
|
@@ -283,13 +290,22 @@ keras_hub/src/models/phi3/phi3_presets.py,sha256=sb2ce7Gq1OikFEf2KIYG69rFKHYKj8q
|
|
283
290
|
keras_hub/src/models/phi3/phi3_rotary_embedding.py,sha256=wqiRn8nETNcLc5Vsm_d_8s11Ro6ibWZbWvODdLqIOo4,5013
|
284
291
|
keras_hub/src/models/phi3/phi3_tokenizer.py,sha256=bOPH14wTVVHJHq8mgzXLjsgvKMNhfO8eayevAPpjYVA,1992
|
285
292
|
keras_hub/src/models/qwen/__init__.py,sha256=hskG3tZUY_AYZPp0WVzbCtw37AIYENyp3DOnqHmdRBw,65
|
286
|
-
keras_hub/src/models/qwen/qwen_attention.py,sha256=
|
293
|
+
keras_hub/src/models/qwen/qwen_attention.py,sha256=SrUYESCg27ksuDKZHKJ5Wmnkbr6WZdF7nHv0AHFfWR8,13014
|
287
294
|
keras_hub/src/models/qwen/qwen_backbone.py,sha256=i39_LoKu6hcYWV6KFh2OzUDaXjV7g1WLNGF2-JD_tqI,13015
|
288
295
|
keras_hub/src/models/qwen/qwen_causal_lm.py,sha256=_f-UHaKHp0ncxknpkpEJiW3jlng3E4CmddjQfz2QzJo,12249
|
289
296
|
keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py,sha256=Va-4TLJD3ycEnkS41rF3dVj4_6K0j-gxLTrREFRcyr0,609
|
290
297
|
keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSwOARG0ote-jAg,11771
|
291
298
|
keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
|
299
|
+
keras_hub/src/models/qwen/qwen_presets.py,sha256=_jRG7bB4yBGWteBLbK2elc1e9doRl8zdzQRZgxFvnfc,1988
|
292
300
|
keras_hub/src/models/qwen/qwen_tokenizer.py,sha256=LCv3IyiDDHqVnM9N3lf5-BE3iwicIh0nKS1hjoPw9lE,1532
|
301
|
+
keras_hub/src/models/qwen_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
302
|
+
keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=mXc4uGkUSK3FHdJ5_77xiX7Gm0eO1GWTF40ei_68pvU,13472
|
303
|
+
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py,sha256=nrfELvIvRLmrgKrUNXci2CrecmeI6bWzJj7HH-RcWJA,15341
|
304
|
+
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py,sha256=MeP60v7GcN_SmH5_ULRpqgmFVgaYAosSecZiSQVlJvU,13256
|
305
|
+
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=uKaXRrJs02vkVudjdehzJPp0B84tPMkxNHlp166kceE,589
|
306
|
+
keras_hub/src/models/qwen_moe/qwen_moe_decoder.py,sha256=kmUjLpYTbJQ3J_31qWhLOd0Dg2_9cl_JX_zM8ZMH1Qo,23130
|
307
|
+
keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py,sha256=DbkWJo7U0-cwdZwHPeAnFznYwtao6o0fjpoDJ9UWnpc,927
|
308
|
+
keras_hub/src/models/qwen_moe/qwen_moe_tokenizer.py,sha256=2c3X8jNGO0q0UL5NtUqSgHWLqhyJGi2ohNcTeOGhd84,1407
|
293
309
|
keras_hub/src/models/resnet/__init__.py,sha256=C5UqlQ6apm8WSp1bnrxB6Bi3BGaknxRQs-r3b2wpaGA,257
|
294
310
|
keras_hub/src/models/resnet/resnet_backbone.py,sha256=Q7nlqcTXZzjqd0e-DsjHC4ok58yOX7qxseotym3uZpM,31276
|
295
311
|
keras_hub/src/models/resnet/resnet_image_classifier.py,sha256=nf35EKDzvBkfhHsK-s6Ks0nbhvKO7HEOYZm94YckyWE,510
|
@@ -300,11 +316,11 @@ keras_hub/src/models/retinanet/__init__.py,sha256=veWIFvMN6151M69l7FvTcI-IIEe_8d
|
|
300
316
|
keras_hub/src/models/retinanet/feature_pyramid.py,sha256=hbdrj6X-D2SlwOp2h1WcBlTdSAlLmFK43X7OrkJRoMA,17614
|
301
317
|
keras_hub/src/models/retinanet/prediction_head.py,sha256=xWHt21-SS2t7vCmTONlR1lSbJXhml5jx68V8MGbGybg,7863
|
302
318
|
keras_hub/src/models/retinanet/retinanet_backbone.py,sha256=BJBPJLxpOCOU0Br7b4JsgCZBHQHLAhxLqo9BHNIsl1g,5659
|
303
|
-
keras_hub/src/models/retinanet/retinanet_image_converter.py,sha256=
|
319
|
+
keras_hub/src/models/retinanet/retinanet_image_converter.py,sha256=jnVAqQ3zem0JNk5iaIdrMGKyGv_ulAcePpM5t1lulWI,360
|
304
320
|
keras_hub/src/models/retinanet/retinanet_label_encoder.py,sha256=Vowhs4uOZAevmVg1a19efIPfvjxkckXwsJDTX3VPDxs,10967
|
305
321
|
keras_hub/src/models/retinanet/retinanet_object_detector.py,sha256=WJ3YLnnC4mcCLLoE7uUFA0cOSVuFgnx9Cr47If50Aig,15595
|
306
322
|
keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py,sha256=RnJkdqv4zYVcGx50sHoA7j9G1AKwEN-RNtyMQg-MMbo,568
|
307
|
-
keras_hub/src/models/retinanet/retinanet_presets.py,sha256=
|
323
|
+
keras_hub/src/models/retinanet/retinanet_presets.py,sha256=75_Gnxt84MBjTDd4xQVSsIa2sDQ-KnQ4_Hw9nZ90ljE,950
|
308
324
|
keras_hub/src/models/roberta/__init__.py,sha256=3ouSnKdLlMwoDDLVKD9cNtxam6f8XWgCyc0pwWJ0Zjo,263
|
309
325
|
keras_hub/src/models/roberta/roberta_backbone.py,sha256=q16dylXbgWshT-elCA08lS_b_IZNphsBrrXiv3eJksM,6339
|
310
326
|
keras_hub/src/models/roberta/roberta_masked_lm.py,sha256=j2dFANRFHd1MNFP_REchljGWOcpOjCpdSya-WGdRzPA,4176
|
@@ -337,8 +353,8 @@ keras_hub/src/models/segformer/__init__.py,sha256=ERgxA8tyeG2l4G6ywHisn6Oo0Iu7_9
|
|
337
353
|
keras_hub/src/models/segformer/segformer_backbone.py,sha256=T61WQ50T6IwSeiK1NfUKJu3eqbj_m5gz9cpUPtqMfcc,5666
|
338
354
|
keras_hub/src/models/segformer/segformer_image_converter.py,sha256=zePZ1cYZl-2TaEF82lj3y7kXjDao5Hgw8c7qfKI2Jd8,360
|
339
355
|
keras_hub/src/models/segformer/segformer_image_segmenter.py,sha256=JzX8oJASWdkw8wbm8cohjPnumIvBvj7GGEpbK7ex-6w,5926
|
340
|
-
keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py,sha256=
|
341
|
-
keras_hub/src/models/segformer/segformer_presets.py,sha256=
|
356
|
+
keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py,sha256=Jlsy41n7IymjvU3ENKQJtsQ4xkJEGRtxEm-defz2Nbc,568
|
357
|
+
keras_hub/src/models/segformer/segformer_presets.py,sha256=4fPkGTP_jjd3Qcd1KbWYZ7-ze0wdJskMwKG4GZ-UwEg,4793
|
342
358
|
keras_hub/src/models/siglip/__init__.py,sha256=uImQYl06pioLwla6c_tiF2PSJKHtq0aSxDPsynQbXcA,257
|
343
359
|
keras_hub/src/models/siglip/siglip_backbone.py,sha256=dXp7BU7mqKWthl70KFZ2AMILjNIu5A6itQFu6XD22Qs,8372
|
344
360
|
keras_hub/src/models/siglip/siglip_image_converter.py,sha256=yjYc0XOyL37WLlr-X6V4QXI1FCyiKngbHXrZK0hNB1U,342
|
@@ -435,23 +451,23 @@ keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=hRv_XxoPIPDpHfO0Z
|
|
435
451
|
keras_hub/src/tokenizers/word_piece_tokenizer.py,sha256=vP6AZgbzsRiuPCt3W_n94nsF7XiERnagWcH_rqJHtVU,19943
|
436
452
|
keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=cylrs02ZrYQ1TuZr9oyS3NrVbDwGctA3VXbIh1pFJMQ,6743
|
437
453
|
keras_hub/src/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
438
|
-
keras_hub/src/utils/keras_utils.py,sha256=
|
454
|
+
keras_hub/src/utils/keras_utils.py,sha256=2qrh4F-rqceVFSx0-cbsFBfWae5hBXFb_sEtPPcImf4,4628
|
439
455
|
keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
|
440
|
-
keras_hub/src/utils/preset_utils.py,sha256=
|
456
|
+
keras_hub/src/utils/preset_utils.py,sha256=fx0gNqOTdvW-ZdP0Y3ZaCGE7frYBhwi3lG_GO0swG4w,34602
|
441
457
|
keras_hub/src/utils/python_utils.py,sha256=N8nWeO3san4YnGkffRXG3Ix7VEIMTKSN21FX5TuL7G8,202
|
442
|
-
keras_hub/src/utils/tensor_utils.py,sha256=
|
458
|
+
keras_hub/src/utils/tensor_utils.py,sha256=vRbvvnFwA6FutJ7InC1w60HDTVNi87CniDGOLQ3hKPA,15855
|
443
459
|
keras_hub/src/utils/coco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
444
460
|
keras_hub/src/utils/coco/coco_utils.py,sha256=x_QnUUvZ92zoFzMJugiInHORc4NrMdWVBkpp8BAYF6s,2586
|
445
461
|
keras_hub/src/utils/imagenet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
446
462
|
keras_hub/src/utils/imagenet/imagenet_utils.py,sha256=07ilM5feeD7Ut6YSbVj99RXAZOQONSC1IeKa3I9U6UQ,40161
|
447
463
|
keras_hub/src/utils/timm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
448
|
-
keras_hub/src/utils/timm/convert_cspnet.py,sha256=
|
464
|
+
keras_hub/src/utils/timm/convert_cspnet.py,sha256=9p1IF0B4UPbDTruQQXR6mJEUdhvQvHx9E0SKNn_Lbw4,8047
|
449
465
|
keras_hub/src/utils/timm/convert_densenet.py,sha256=fu8HBIQis5o3ib2tyI2qnmYScVrVIQySok8vTfa1qJ8,3393
|
450
466
|
keras_hub/src/utils/timm/convert_efficientnet.py,sha256=SgEIlyyinS04qoQpEgh3WazHq544zNUCCpfmWh3EjSs,17100
|
451
467
|
keras_hub/src/utils/timm/convert_mobilenet.py,sha256=XTqHOK4nJwigKefsw7ktWJtOgRpEVMO9MtRhuP5qP_k,9219
|
452
468
|
keras_hub/src/utils/timm/convert_resnet.py,sha256=8JFkVtdpy5z9h83LJ97rD-a8FRejXPZvMNksNuStqjM,5834
|
453
469
|
keras_hub/src/utils/timm/convert_vgg.py,sha256=MT5jGnLrzenPpe66Af_Lp1IdR9KGtsSrcmn6_UPqHvQ,2419
|
454
|
-
keras_hub/src/utils/timm/preset_loader.py,sha256=
|
470
|
+
keras_hub/src/utils/timm/preset_loader.py,sha256=4hULdq2K2hgPYTZR71PGV4YNDHLG1zcoxF9TXpg6fGE,3905
|
455
471
|
keras_hub/src/utils/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
456
472
|
keras_hub/src/utils/transformers/convert_albert.py,sha256=VdKclZpCxtDWq3UbUUQZf4fR9DJK_JYZ73B4O_G9skg,7695
|
457
473
|
keras_hub/src/utils/transformers/convert_bart.py,sha256=Tk4h9Md9rwN5wjQbGIVrC7qzDpF8kI8qm-FKL8HlUok,14411
|
@@ -461,14 +477,16 @@ keras_hub/src/utils/transformers/convert_gemma.py,sha256=ElCgwBpSN5Q7rV5PJawTsoy
|
|
461
477
|
keras_hub/src/utils/transformers/convert_gpt2.py,sha256=HCeHN_-GiQJRxLCM9OCJJ1watPVpIBF8ujS8pGbBOWc,5703
|
462
478
|
keras_hub/src/utils/transformers/convert_llama3.py,sha256=c5phNl-QayQ_BS0s-lenbu6oHxqfwDShKJoh9DluxUU,6146
|
463
479
|
keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS7eANJUXIsNy1RxWXy20Gqw,4760
|
480
|
+
keras_hub/src/utils/transformers/convert_mixtral.py,sha256=_esUzVRYABR5pHHSALqUieSuAeBg4te1JnlGQENqECU,5509
|
464
481
|
keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYumf66hIid07k5NLqoeWAJgPnaLs,10649
|
465
482
|
keras_hub/src/utils/transformers/convert_qwen.py,sha256=WUxMAEFVqRs7TRw7QU5TH3_ev4yf02R1xFVliMvTQqg,5886
|
483
|
+
keras_hub/src/utils/transformers/convert_qwen_moe.py,sha256=a7R28aln-PdAcNuKAXdrtzvslho2Co6GypChxLMKPpc,10618
|
466
484
|
keras_hub/src/utils/transformers/convert_vit.py,sha256=9SUZ9utNJhW_5cj3acMn9cRy47u2eIcDsrhmzj77o9k,5187
|
467
|
-
keras_hub/src/utils/transformers/preset_loader.py,sha256=
|
485
|
+
keras_hub/src/utils/transformers/preset_loader.py,sha256=1nfS5xVsl-JROGXJXltTqV1fQdcUlZbGGcbf-n79pXM,4225
|
468
486
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
|
469
|
-
keras_hub/tokenizers/__init__.py,sha256=
|
487
|
+
keras_hub/tokenizers/__init__.py,sha256=4etC--bzhczJrRcvCmxZmOC9hJJcIVOUCgmqMLB3bp0,4051
|
470
488
|
keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
|
471
|
-
keras_hub_nightly-0.21.0.
|
472
|
-
keras_hub_nightly-0.21.0.
|
473
|
-
keras_hub_nightly-0.21.0.
|
474
|
-
keras_hub_nightly-0.21.0.
|
489
|
+
keras_hub_nightly-0.21.0.dev202505070407.dist-info/METADATA,sha256=qZV9GqHYenWgLpKBXdil_rhFnetmHPFV-OgVFjxPKTs,7393
|
490
|
+
keras_hub_nightly-0.21.0.dev202505070407.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
491
|
+
keras_hub_nightly-0.21.0.dev202505070407.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
492
|
+
keras_hub_nightly-0.21.0.dev202505070407.dist-info/RECORD,,
|
File without changes
|