keras-hub 0.22.1__py3-none-any.whl → 0.23.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. keras_hub/layers/__init__.py +12 -0
  2. keras_hub/models/__init__.py +90 -0
  3. keras_hub/src/layers/modeling/position_embedding.py +21 -6
  4. keras_hub/src/layers/modeling/reversible_embedding.py +8 -1
  5. keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
  6. keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
  7. keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
  8. keras_hub/src/models/backbone.py +28 -16
  9. keras_hub/src/models/causal_lm.py +37 -0
  10. keras_hub/src/models/causal_lm_preprocessor.py +14 -0
  11. keras_hub/src/models/clip/clip_presets.py +8 -8
  12. keras_hub/src/models/d_fine/__init__.py +5 -0
  13. keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
  14. keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
  15. keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
  16. keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
  17. keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
  18. keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
  19. keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
  20. keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
  21. keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
  22. keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
  23. keras_hub/src/models/d_fine/d_fine_presets.py +155 -0
  24. keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
  25. keras_hub/src/models/deberta_v3/disentangled_self_attention.py +7 -2
  26. keras_hub/src/models/depth_anything/__init__.py +9 -0
  27. keras_hub/src/models/depth_anything/depth_anything_backbone.py +232 -0
  28. keras_hub/src/models/depth_anything/depth_anything_depth_estimator.py +70 -0
  29. keras_hub/src/models/depth_anything/depth_anything_depth_estimator_preprocessor.py +16 -0
  30. keras_hub/src/models/depth_anything/depth_anything_image_converter.py +10 -0
  31. keras_hub/src/models/depth_anything/depth_anything_layers.py +725 -0
  32. keras_hub/src/models/depth_anything/depth_anything_loss.py +89 -0
  33. keras_hub/src/models/depth_anything/depth_anything_presets.py +41 -0
  34. keras_hub/src/models/depth_anything/interpolate.py +62 -0
  35. keras_hub/src/models/depth_estimator.py +239 -0
  36. keras_hub/src/models/depth_estimator_preprocessor.py +78 -0
  37. keras_hub/src/models/dinov2/dinov2_backbone.py +29 -3
  38. keras_hub/src/models/dinov2/dinov2_layers.py +13 -3
  39. keras_hub/src/models/gemma/gemma_backbone.py +0 -1
  40. keras_hub/src/models/gemma/gemma_presets.py +30 -0
  41. keras_hub/src/models/gemma3/gemma3_attention.py +48 -0
  42. keras_hub/src/models/gemma3/gemma3_backbone.py +4 -1
  43. keras_hub/src/models/gemma3/gemma3_decoder_block.py +12 -0
  44. keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
  45. keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
  46. keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
  47. keras_hub/src/models/image_to_image.py +5 -0
  48. keras_hub/src/models/inpaint.py +5 -0
  49. keras_hub/src/models/mobilenetv5/__init__.py +9 -0
  50. keras_hub/src/models/mobilenetv5/mobilenetv5_attention.py +699 -0
  51. keras_hub/src/models/mobilenetv5/mobilenetv5_backbone.py +396 -0
  52. keras_hub/src/models/mobilenetv5/mobilenetv5_blocks.py +890 -0
  53. keras_hub/src/models/mobilenetv5/mobilenetv5_builder.py +436 -0
  54. keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier.py +157 -0
  55. keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_preprocessor.py +16 -0
  56. keras_hub/src/models/mobilenetv5/mobilenetv5_image_converter.py +10 -0
  57. keras_hub/src/models/mobilenetv5/mobilenetv5_layers.py +462 -0
  58. keras_hub/src/models/mobilenetv5/mobilenetv5_presets.py +15 -0
  59. keras_hub/src/models/mobilenetv5/mobilenetv5_utils.py +146 -0
  60. keras_hub/src/models/parseq/__init__.py +5 -0
  61. keras_hub/src/models/parseq/parseq_backbone.py +134 -0
  62. keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
  63. keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
  64. keras_hub/src/models/parseq/parseq_decoder.py +418 -0
  65. keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
  66. keras_hub/src/models/parseq/parseq_presets.py +15 -0
  67. keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
  68. keras_hub/src/models/qwen3_moe/__init__.py +5 -0
  69. keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py +371 -0
  70. keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +365 -0
  71. keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm.py +357 -0
  72. keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_preprocessor.py +12 -0
  73. keras_hub/src/models/qwen3_moe/qwen3_moe_decoder.py +672 -0
  74. keras_hub/src/models/qwen3_moe/qwen3_moe_layernorm.py +45 -0
  75. keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +30 -0
  76. keras_hub/src/models/qwen3_moe/qwen3_moe_tokenizer.py +48 -0
  77. keras_hub/src/models/sam/sam_prompt_encoder.py +3 -1
  78. keras_hub/src/models/smollm3/smollm3_backbone.py +211 -0
  79. keras_hub/src/models/smollm3/smollm3_causal_lm.py +310 -0
  80. keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py +84 -0
  81. keras_hub/src/models/smollm3/smollm3_layers.py +757 -0
  82. keras_hub/src/models/smollm3/smollm3_tokenizer.py +60 -0
  83. keras_hub/src/models/smollm3/smollm3_utils.py +56 -0
  84. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +3 -3
  85. keras_hub/src/models/t5gemma/__init__.py +5 -0
  86. keras_hub/src/models/t5gemma/t5gemma_attention.py +370 -0
  87. keras_hub/src/models/t5gemma/t5gemma_backbone.py +366 -0
  88. keras_hub/src/models/t5gemma/t5gemma_decoder.py +355 -0
  89. keras_hub/src/models/t5gemma/t5gemma_encoder.py +214 -0
  90. keras_hub/src/models/t5gemma/t5gemma_layers.py +118 -0
  91. keras_hub/src/models/t5gemma/t5gemma_presets.py +374 -0
  92. keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm.py +442 -0
  93. keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_preprocessor.py +216 -0
  94. keras_hub/src/models/t5gemma/t5gemma_tokenizer.py +84 -0
  95. keras_hub/src/models/text_to_image.py +5 -0
  96. keras_hub/src/samplers/beam_sampler.py +6 -6
  97. keras_hub/src/samplers/sampler.py +8 -6
  98. keras_hub/src/tests/test_case.py +40 -3
  99. keras_hub/src/tokenizers/tokenizer.py +15 -0
  100. keras_hub/src/utils/openvino_utils.py +141 -0
  101. keras_hub/src/utils/preset_utils.py +58 -2
  102. keras_hub/src/utils/tensor_utils.py +23 -1
  103. keras_hub/src/utils/timm/convert_mobilenetv5.py +321 -0
  104. keras_hub/src/utils/timm/preset_loader.py +8 -4
  105. keras_hub/src/utils/transformers/convert_dinov2.py +1 -0
  106. keras_hub/src/utils/transformers/convert_qwen3_moe.py +216 -0
  107. keras_hub/src/utils/transformers/convert_smollm3.py +139 -0
  108. keras_hub/src/utils/transformers/convert_t5gemma.py +229 -0
  109. keras_hub/src/utils/transformers/convert_vit.py +4 -1
  110. keras_hub/src/utils/transformers/export/gemma.py +49 -4
  111. keras_hub/src/utils/transformers/export/hf_exporter.py +71 -25
  112. keras_hub/src/utils/transformers/preset_loader.py +9 -0
  113. keras_hub/src/version.py +1 -1
  114. keras_hub/tokenizers/__init__.py +15 -0
  115. {keras_hub-0.22.1.dist-info → keras_hub-0.23.0.dev0.dist-info}/METADATA +1 -1
  116. {keras_hub-0.22.1.dist-info → keras_hub-0.23.0.dev0.dist-info}/RECORD +118 -45
  117. {keras_hub-0.22.1.dist-info → keras_hub-0.23.0.dev0.dist-info}/WHEEL +0 -0
  118. {keras_hub-0.22.1.dist-info → keras_hub-0.23.0.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
1
+ import numpy as np
2
+
3
+ from keras_hub.src.models.smollm3.smollm3_backbone import SmolLM3Backbone
4
+ from keras_hub.src.utils.preset_utils import load_json
5
+
6
+ backbone_cls = SmolLM3Backbone
7
+
8
+
9
+ def convert_backbone_config(transformers_config):
10
+ return {
11
+ "vocabulary_size": transformers_config["vocab_size"],
12
+ "hidden_dim": transformers_config["hidden_size"],
13
+ "num_layers": transformers_config["num_hidden_layers"],
14
+ "num_attention_heads": transformers_config["num_attention_heads"],
15
+ "num_key_value_heads": transformers_config["num_key_value_heads"],
16
+ "intermediate_dim": transformers_config["intermediate_size"],
17
+ "layer_norm_epsilon": transformers_config[
18
+ "rms_norm_eps"
19
+ ], # Using rms_norm_eps as layer_norm_epsilon
20
+ "max_position_embeddings": transformers_config[
21
+ "max_position_embeddings"
22
+ ],
23
+ "rope_theta": transformers_config["rope_theta"],
24
+ # partial_rotary_factor is not explicitly in config.json
25
+ # but is inherited from the default value in the
26
+ # `_compute_default_rope_parameters()` function
27
+ "partial_rotary_factor": 1.0,
28
+ "attention_bias": transformers_config["attention_bias"],
29
+ "attention_dropout": transformers_config["attention_dropout"],
30
+ # Despite the name, no_rope_layers: 1 = HAS RoPE, 0 = NO RoPE
31
+ "rope_layer_enabled_list": [
32
+ bool(x) for x in transformers_config["no_rope_layers"]
33
+ ],
34
+ "layer_types": transformers_config["layer_types"],
35
+ "mlp_bias": transformers_config["mlp_bias"],
36
+ }
37
+
38
+
39
+ def convert_weights(backbone, loader, transformers_config):
40
+ loader.port_weight(
41
+ keras_variable=backbone.get_layer("token_embedding").embeddings,
42
+ hf_weight_key="model.embed_tokens.weight",
43
+ )
44
+
45
+ def transpose_and_reshape(x, shape):
46
+ return np.reshape(np.transpose(x), shape)
47
+
48
+ for i in range(backbone.num_layers):
49
+ decoder_layer = backbone.get_layer(f"transformer_layer_{i}")
50
+
51
+ # Input layernorm
52
+ loader.port_weight(
53
+ keras_variable=decoder_layer.input_layernorm.scale,
54
+ hf_weight_key=f"model.layers.{i}.input_layernorm.weight",
55
+ )
56
+
57
+ # Attention layers
58
+ ## Query
59
+ loader.port_weight(
60
+ keras_variable=decoder_layer.self_attn.q_proj.kernel,
61
+ hf_weight_key=f"model.layers.{i}.self_attn.q_proj.weight",
62
+ hook_fn=transpose_and_reshape,
63
+ )
64
+ ## Key
65
+ loader.port_weight(
66
+ keras_variable=decoder_layer.self_attn.k_proj.kernel,
67
+ hf_weight_key=f"model.layers.{i}.self_attn.k_proj.weight",
68
+ hook_fn=transpose_and_reshape,
69
+ )
70
+ ## Value
71
+ loader.port_weight(
72
+ keras_variable=decoder_layer.self_attn.v_proj.kernel,
73
+ hf_weight_key=f"model.layers.{i}.self_attn.v_proj.weight",
74
+ hook_fn=transpose_and_reshape,
75
+ )
76
+ ## Output
77
+ loader.port_weight(
78
+ keras_variable=decoder_layer.self_attn.o_proj.kernel,
79
+ hf_weight_key=f"model.layers.{i}.self_attn.o_proj.weight",
80
+ hook_fn=transpose_and_reshape,
81
+ )
82
+
83
+ # MLP layers
84
+ loader.port_weight(
85
+ keras_variable=decoder_layer.mlp.up_proj.kernel,
86
+ hf_weight_key=f"model.layers.{i}.mlp.up_proj.weight",
87
+ # rearrange_patterns="b a -> a b",
88
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
89
+ )
90
+ loader.port_weight(
91
+ keras_variable=decoder_layer.mlp.down_proj.kernel,
92
+ hf_weight_key=f"model.layers.{i}.mlp.down_proj.weight",
93
+ # rearrange_patterns="b a -> a b",
94
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
95
+ )
96
+ loader.port_weight(
97
+ keras_variable=decoder_layer.mlp.gate_proj.kernel,
98
+ hf_weight_key=f"model.layers.{i}.mlp.gate_proj.weight",
99
+ # rearrange_patterns="b a -> a b",
100
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
101
+ )
102
+
103
+ # Feedforward layernorm
104
+ loader.port_weight(
105
+ keras_variable=decoder_layer.post_attention_layernorm.scale,
106
+ hf_weight_key=f"model.layers.{i}.post_attention_layernorm.weight",
107
+ )
108
+
109
+ # Final normalization layer
110
+ loader.port_weight(
111
+ keras_variable=backbone.get_layer("sequence_output_layernorm").scale,
112
+ hf_weight_key="model.norm.weight",
113
+ )
114
+
115
+ backbone.training = False
116
+
117
+ return backbone
118
+
119
+
120
+ def convert_tokenizer(cls, preset, **kwargs):
121
+ tokenizer_config = load_json(preset, "tokenizer.json")
122
+ vocab = tokenizer_config["model"]["vocab"]
123
+ merges = tokenizer_config["model"]["merges"]
124
+ merges = [" ".join(item) for item in merges]
125
+
126
+ # Load all special tokens with the exception of "reserved" ones.
127
+ special_tokens = set()
128
+ for token in tokenizer_config["added_tokens"]:
129
+ if not token["content"].startswith("<|reserved_special_token_"):
130
+ vocab[token["content"]] = token["id"]
131
+ special_tokens.add(token["content"])
132
+
133
+ kwargs.update(
134
+ {
135
+ "unsplittable_tokens": list(special_tokens),
136
+ }
137
+ )
138
+
139
+ return cls(vocabulary=vocab, merges=merges, **kwargs)
@@ -0,0 +1,229 @@
1
+ from keras_hub.src.models.t5gemma.t5gemma_backbone import T5GemmaBackbone
2
+ from keras_hub.src.utils.preset_utils import get_file
3
+
4
+ backbone_cls = T5GemmaBackbone
5
+
6
+
7
+ def convert_backbone_config(transformers_config):
8
+ """Convert a Hugging Face T5Gemma config to a KerasHub backbone config."""
9
+ encoder_config = transformers_config["encoder"]
10
+ decoder_config = transformers_config["decoder"]
11
+
12
+ if decoder_config.get("hidden_activation") == "gelu_pytorch_tanh":
13
+ decoder_config["hidden_activation"] = "gelu_approximate"
14
+ if encoder_config.get("hidden_activation") == "gelu_pytorch_tanh":
15
+ encoder_config["hidden_activation"] = "gelu_approximate"
16
+
17
+ backbone_config = {
18
+ "vocabulary_size": decoder_config["vocab_size"],
19
+ "encoder_hidden_dim": encoder_config["hidden_size"],
20
+ "encoder_intermediate_dim": encoder_config["intermediate_size"],
21
+ "encoder_num_layers": encoder_config["num_hidden_layers"],
22
+ "encoder_num_attention_heads": encoder_config["num_attention_heads"],
23
+ "encoder_num_key_value_heads": encoder_config["num_key_value_heads"],
24
+ "encoder_head_dim": encoder_config["head_dim"],
25
+ "encoder_layer_types": encoder_config["layer_types"],
26
+ "decoder_hidden_dim": decoder_config["hidden_size"],
27
+ "decoder_intermediate_dim": decoder_config["intermediate_size"],
28
+ "decoder_num_layers": decoder_config["num_hidden_layers"],
29
+ "decoder_num_attention_heads": decoder_config["num_attention_heads"],
30
+ "decoder_num_key_value_heads": decoder_config["num_key_value_heads"],
31
+ "decoder_head_dim": decoder_config["head_dim"],
32
+ "decoder_layer_types": decoder_config["layer_types"],
33
+ "dropout_rate": decoder_config["dropout_rate"],
34
+ "rms_norm_eps": decoder_config["rms_norm_eps"],
35
+ "query_pre_attn_scalar": decoder_config["query_pre_attn_scalar"],
36
+ "tie_word_embeddings": transformers_config.get(
37
+ "tie_word_embeddings", True
38
+ ),
39
+ "attention_bias": decoder_config["attention_bias"],
40
+ "hidden_activation": decoder_config["hidden_activation"],
41
+ "initializer_range": decoder_config["initializer_range"],
42
+ "attention_dropout": decoder_config["attention_dropout"],
43
+ "sliding_window": decoder_config["sliding_window"],
44
+ "cross_attention_hidden_size": encoder_config["hidden_size"],
45
+ "attn_logit_softcapping": decoder_config["attn_logit_softcapping"],
46
+ "final_logit_softcapping": decoder_config["final_logit_softcapping"],
47
+ "rope_max_wavelength": decoder_config["rope_theta"],
48
+ }
49
+ return backbone_config
50
+
51
+
52
+ def convert_weights(backbone, loader, transformers_config):
53
+ """Convert T5Gemma from Hugging Face to KerasHub."""
54
+ # Token embeddings.
55
+ loader.port_weight(
56
+ keras_variable=backbone.token_embedding.embeddings,
57
+ hf_weight_key="encoder.embed_tokens.weight",
58
+ )
59
+ loader.port_weight(
60
+ keras_variable=backbone.decoder_token_embedding.embeddings,
61
+ hf_weight_key="decoder.embed_tokens.weight",
62
+ )
63
+
64
+ # Encoder.
65
+ loader.port_weight(
66
+ keras_variable=backbone.encoder_norm.scale,
67
+ hf_weight_key="encoder.norm.weight",
68
+ )
69
+ for i in range(backbone.encoder_num_layers):
70
+ layer = backbone.get_layer(f"encoder_layer_{i}")
71
+ hf_prefix = f"encoder.layers.{i}"
72
+
73
+ # Self-attention.
74
+ loader.port_weight(
75
+ keras_variable=layer.self_attn.query_dense.kernel,
76
+ hf_weight_key=f"{hf_prefix}.self_attn.q_proj.weight",
77
+ hook_fn=lambda w, s: w.T.reshape(s),
78
+ )
79
+ loader.port_weight(
80
+ keras_variable=layer.self_attn.key_dense.kernel,
81
+ hf_weight_key=f"{hf_prefix}.self_attn.k_proj.weight",
82
+ hook_fn=lambda w, s: w.T.reshape(s),
83
+ )
84
+ loader.port_weight(
85
+ keras_variable=layer.self_attn.value_dense.kernel,
86
+ hf_weight_key=f"{hf_prefix}.self_attn.v_proj.weight",
87
+ hook_fn=lambda w, s: w.T.reshape(s),
88
+ )
89
+ loader.port_weight(
90
+ keras_variable=layer.self_attn.output_dense.kernel,
91
+ hf_weight_key=f"{hf_prefix}.self_attn.o_proj.weight",
92
+ hook_fn=lambda w, s: w.T.reshape(s),
93
+ )
94
+
95
+ # MLP.
96
+ loader.port_weight(
97
+ keras_variable=layer.mlp.gate_proj.kernel,
98
+ hf_weight_key=f"{hf_prefix}.mlp.gate_proj.weight",
99
+ hook_fn=lambda w, s: w.T,
100
+ )
101
+ loader.port_weight(
102
+ keras_variable=layer.mlp.up_proj.kernel,
103
+ hf_weight_key=f"{hf_prefix}.mlp.up_proj.weight",
104
+ hook_fn=lambda w, s: w.T,
105
+ )
106
+ loader.port_weight(
107
+ keras_variable=layer.mlp.down_proj.kernel,
108
+ hf_weight_key=f"{hf_prefix}.mlp.down_proj.weight",
109
+ hook_fn=lambda w, s: w.T,
110
+ )
111
+
112
+ # Layer norm.
113
+ loader.port_weight(
114
+ keras_variable=layer.pre_self_attn_layernorm.scale,
115
+ hf_weight_key=f"{hf_prefix}.pre_self_attn_layernorm.weight",
116
+ )
117
+ loader.port_weight(
118
+ keras_variable=layer.post_self_attn_layernorm.scale,
119
+ hf_weight_key=f"{hf_prefix}.post_self_attn_layernorm.weight",
120
+ )
121
+ loader.port_weight(
122
+ keras_variable=layer.pre_feedforward_layernorm.scale,
123
+ hf_weight_key=f"{hf_prefix}.pre_feedforward_layernorm.weight",
124
+ )
125
+ loader.port_weight(
126
+ keras_variable=layer.post_feedforward_layernorm.scale,
127
+ hf_weight_key=f"{hf_prefix}.post_feedforward_layernorm.weight",
128
+ )
129
+
130
+ # Decoder.
131
+ loader.port_weight(
132
+ keras_variable=backbone.decoder_norm.scale,
133
+ hf_weight_key="decoder.norm.weight",
134
+ )
135
+ for i in range(backbone.decoder_num_layers):
136
+ layer = backbone.get_layer(f"decoder_layer_{i}")
137
+ hf_prefix = f"decoder.layers.{i}"
138
+
139
+ # Self-attention.
140
+ loader.port_weight(
141
+ keras_variable=layer.self_attn.query_dense.kernel,
142
+ hf_weight_key=f"{hf_prefix}.self_attn.q_proj.weight",
143
+ hook_fn=lambda w, s: w.T.reshape(s),
144
+ )
145
+ loader.port_weight(
146
+ keras_variable=layer.self_attn.key_dense.kernel,
147
+ hf_weight_key=f"{hf_prefix}.self_attn.k_proj.weight",
148
+ hook_fn=lambda w, s: w.T.reshape(s),
149
+ )
150
+ loader.port_weight(
151
+ keras_variable=layer.self_attn.value_dense.kernel,
152
+ hf_weight_key=f"{hf_prefix}.self_attn.v_proj.weight",
153
+ hook_fn=lambda w, s: w.T.reshape(s),
154
+ )
155
+ loader.port_weight(
156
+ keras_variable=layer.self_attn.output_dense.kernel,
157
+ hf_weight_key=f"{hf_prefix}.self_attn.o_proj.weight",
158
+ hook_fn=lambda w, s: w.T.reshape(s),
159
+ )
160
+
161
+ # Cross-attention.
162
+ loader.port_weight(
163
+ keras_variable=layer.cross_attn.query_dense.kernel,
164
+ hf_weight_key=f"{hf_prefix}.cross_attn.q_proj.weight",
165
+ hook_fn=lambda w, s: w.T.reshape(s),
166
+ )
167
+ loader.port_weight(
168
+ keras_variable=layer.cross_attn.key_dense.kernel,
169
+ hf_weight_key=f"{hf_prefix}.cross_attn.k_proj.weight",
170
+ hook_fn=lambda w, s: w.T.reshape(s),
171
+ )
172
+ loader.port_weight(
173
+ keras_variable=layer.cross_attn.value_dense.kernel,
174
+ hf_weight_key=f"{hf_prefix}.cross_attn.v_proj.weight",
175
+ hook_fn=lambda w, s: w.T.reshape(s),
176
+ )
177
+ loader.port_weight(
178
+ keras_variable=layer.cross_attn.output_dense.kernel,
179
+ hf_weight_key=f"{hf_prefix}.cross_attn.o_proj.weight",
180
+ hook_fn=lambda w, s: w.T.reshape(s),
181
+ )
182
+
183
+ # MLP.
184
+ loader.port_weight(
185
+ keras_variable=layer.mlp.gate_proj.kernel,
186
+ hf_weight_key=f"{hf_prefix}.mlp.gate_proj.weight",
187
+ hook_fn=lambda w, s: w.T,
188
+ )
189
+ loader.port_weight(
190
+ keras_variable=layer.mlp.up_proj.kernel,
191
+ hf_weight_key=f"{hf_prefix}.mlp.up_proj.weight",
192
+ hook_fn=lambda w, s: w.T,
193
+ )
194
+ loader.port_weight(
195
+ keras_variable=layer.mlp.down_proj.kernel,
196
+ hf_weight_key=f"{hf_prefix}.mlp.down_proj.weight",
197
+ hook_fn=lambda w, s: w.T,
198
+ )
199
+
200
+ # Layer norm.
201
+ loader.port_weight(
202
+ keras_variable=layer.pre_self_attn_layernorm.scale,
203
+ hf_weight_key=f"{hf_prefix}.pre_self_attn_layernorm.weight",
204
+ )
205
+ loader.port_weight(
206
+ keras_variable=layer.post_self_attn_layernorm.scale,
207
+ hf_weight_key=f"{hf_prefix}.post_self_attn_layernorm.weight",
208
+ )
209
+ loader.port_weight(
210
+ keras_variable=layer.pre_cross_attn_layernorm.scale,
211
+ hf_weight_key=f"{hf_prefix}.pre_cross_attn_layernorm.weight",
212
+ )
213
+ loader.port_weight(
214
+ keras_variable=layer.post_cross_attn_layernorm.scale,
215
+ hf_weight_key=f"{hf_prefix}.post_cross_attn_layernorm.weight",
216
+ )
217
+ loader.port_weight(
218
+ keras_variable=layer.pre_feedforward_layernorm.scale,
219
+ hf_weight_key=f"{hf_prefix}.pre_feedforward_layernorm.weight",
220
+ )
221
+ loader.port_weight(
222
+ keras_variable=layer.post_feedforward_layernorm.scale,
223
+ hf_weight_key=f"{hf_prefix}.post_feedforward_layernorm.weight",
224
+ )
225
+
226
+
227
+ def convert_tokenizer(cls, preset, **kwargs):
228
+ """Convert a T5Gemma tokenizer."""
229
+ return cls(get_file(preset, "tokenizer.model"), **kwargs)
@@ -9,7 +9,10 @@ def convert_backbone_config(transformers_config):
9
9
  image_size = transformers_config["image_size"]
10
10
  return {
11
11
  "image_shape": (image_size, image_size, 3),
12
- "patch_size": transformers_config["patch_size"],
12
+ "patch_size": (
13
+ transformers_config["patch_size"],
14
+ transformers_config["patch_size"],
15
+ ),
13
16
  "num_layers": transformers_config["num_hidden_layers"],
14
17
  "num_heads": transformers_config["num_attention_heads"],
15
18
  "hidden_dim": transformers_config["hidden_size"],
@@ -2,6 +2,7 @@ import keras.ops as ops
2
2
 
3
3
 
4
4
  def get_gemma_config(backbone):
5
+ token_embedding_layer = backbone.get_layer("token_embedding")
5
6
  hf_config = {
6
7
  "vocab_size": backbone.vocabulary_size,
7
8
  "num_hidden_layers": backbone.num_layers,
@@ -11,11 +12,16 @@ def get_gemma_config(backbone):
11
12
  "intermediate_size": backbone.intermediate_dim // 2,
12
13
  "head_dim": backbone.head_dim,
13
14
  "max_position_embeddings": 8192,
15
+ "tie_word_embeddings": token_embedding_layer.tie_weights,
16
+ "pad_token_id": 0,
17
+ "bos_token_id": 2,
18
+ "eos_token_id": 1,
19
+ "model_type": "gemma",
14
20
  }
15
21
  return hf_config
16
22
 
17
23
 
18
- def get_gemma_weights_map(backbone):
24
+ def get_gemma_weights_map(backbone, include_lm_head=False):
19
25
  weights_dict = {}
20
26
 
21
27
  # Map token embedding
@@ -83,7 +89,46 @@ def get_gemma_weights_map(backbone):
83
89
  "final_normalization"
84
90
  ).weights[0]
85
91
 
86
- # Tie weights, but clone to avoid sharing memory issues
87
- weights_dict["lm_head.weight"] = ops.copy(token_embedding_layer.weights[0])
88
-
92
+ # Map lm_head if embeddings are not tied
93
+ if include_lm_head and not token_embedding_layer.tie_weights:
94
+ weights_dict["lm_head.weight"] = ops.transpose(
95
+ token_embedding_layer.reverse_embeddings
96
+ )
89
97
  return weights_dict
98
+
99
+
100
+ def get_gemma_tokenizer_config(tokenizer):
101
+ tokenizer_config = {
102
+ "tokenizer_class": "GemmaTokenizer",
103
+ "clean_up_tokenization_spaces": False,
104
+ "bos_token": "<bos>",
105
+ "eos_token": "<eos>",
106
+ "pad_token": "<pad>",
107
+ "unk_token": "<unk>",
108
+ "add_bos_token": True,
109
+ "add_eos_token": False,
110
+ "model_max_length": 8192,
111
+ }
112
+ # Add added_tokens_decoder
113
+ added_tokens_decoder = {}
114
+ special_tokens = [
115
+ "<pad>",
116
+ "<bos>",
117
+ "<eos>",
118
+ "<unk>",
119
+ "<start_of_turn>",
120
+ "<end_of_turn>",
121
+ ]
122
+ for token in special_tokens:
123
+ token_id = tokenizer.token_to_id(token)
124
+ if token_id is not None:
125
+ added_tokens_decoder[str(token_id)] = {
126
+ "content": token,
127
+ "special": True,
128
+ "single_word": False,
129
+ "lstrip": False,
130
+ "rstrip": False,
131
+ "normalized": False,
132
+ }
133
+ tokenizer_config["added_tokens_decoder"] = added_tokens_decoder
134
+ return tokenizer_config
@@ -6,58 +6,59 @@ import warnings
6
6
  import keras
7
7
 
8
8
  from keras_hub.src.utils.transformers.export.gemma import get_gemma_config
9
+ from keras_hub.src.utils.transformers.export.gemma import (
10
+ get_gemma_tokenizer_config,
11
+ )
9
12
  from keras_hub.src.utils.transformers.export.gemma import get_gemma_weights_map
10
13
 
11
14
  MODEL_CONFIGS = {
12
15
  "GemmaBackbone": get_gemma_config,
13
- # Add future models here, e.g., "LlamaBackbone": get_llama_config,
16
+ # Add for future models, e.g., "MistralBackbone": get_mistral_config
14
17
  }
15
18
 
16
19
  MODEL_EXPORTERS = {
17
20
  "GemmaBackbone": get_gemma_weights_map,
18
- # Add future models here, e.g., "LlamaBackbone": get_llama_weights_map,
21
+ # Add for future models, e.g., "MistralBackbone": get_mistral_weights_map
19
22
  }
20
23
 
24
+ MODEL_TOKENIZER_CONFIGS = {
25
+ "GemmaTokenizer": get_gemma_tokenizer_config,
26
+ # Add for future models, e.g., "MistralTokenizer":
27
+ # get_mistral_tokenizer_config
28
+ }
21
29
 
22
- def export_to_safetensors(keras_model, path):
23
- """Converts a Keras model to Hugging Face safetensor format.
24
30
 
25
- It does the following:
26
- - Extracts and maps weights from the Keras backbone to safetensors.
27
- - Saves the configuration as 'config.json'.
28
- - Saves weights in 'model.safetensors'.
29
- - Saves tokenizer assets.
31
+ def export_backbone(backbone, path, include_lm_head=False):
32
+ """Export the backbone model to HuggingFace format.
30
33
 
31
34
  Args:
32
- keras_model: The Keras model to convert.
33
- path: str. Path of the directory to which the safetensors file,
34
- config and tokenizer will be saved.
35
+ backbone: The Keras backbone model to convert.
36
+ path: str. Path to save the exported model.
37
+ include_lm_head: bool. If True, include lm_head weights if applicable.
35
38
  """
36
39
  backend = keras.config.backend()
37
- backbone = keras_model.backbone
38
40
  model_type = backbone.__class__.__name__
39
-
40
41
  if model_type not in MODEL_CONFIGS:
41
- raise ValueError(f"Config not implemented for {model_type}")
42
-
42
+ raise ValueError(
43
+ f"Export to Transformers format not implemented for {model_type}"
44
+ )
43
45
  if model_type not in MODEL_EXPORTERS:
44
- raise ValueError(f"Exporter not implemented for {model_type}")
45
-
46
+ raise ValueError(
47
+ f"Export to Transformers format not implemented for {model_type}"
48
+ )
49
+ # Get config
46
50
  get_config_fn = MODEL_CONFIGS[model_type]
47
51
  hf_config = get_config_fn(backbone)
48
-
52
+ # Get weights
49
53
  get_weights_fn = MODEL_EXPORTERS[model_type]
50
- weights_dict = get_weights_fn(backbone)
51
-
54
+ weights_dict = get_weights_fn(backbone, include_lm_head=include_lm_head)
52
55
  if not weights_dict:
53
56
  raise ValueError("No weights to save.")
54
-
55
57
  # Save config
56
58
  os.makedirs(path, exist_ok=True)
57
59
  config_path = os.path.join(path, "config.json")
58
60
  with open(config_path, "w") as f:
59
61
  json.dump(hf_config, f)
60
-
61
62
  # Save weights based on backend
62
63
  weights_path = os.path.join(path, "model.safetensors")
63
64
  if backend == "torch":
@@ -81,9 +82,28 @@ def export_to_safetensors(keras_model, path):
81
82
  else:
82
83
  raise ValueError(f"Unsupported backend: {backend}")
83
84
 
84
- # Save tokenizer assets
85
- keras_model.preprocessor.tokenizer.save_assets(path)
86
85
 
86
+ def export_tokenizer(tokenizer, path):
87
+ """Export only the tokenizer to HuggingFace Transformers format.
88
+
89
+ Args:
90
+ tokenizer: The Keras tokenizer to convert.
91
+ path: str. Path to save the exported tokenizer.
92
+ """
93
+ os.makedirs(path, exist_ok=True)
94
+ # Save tokenizer assets
95
+ tokenizer.save_assets(path)
96
+ # Export tokenizer config
97
+ tokenizer_type = tokenizer.__class__.__name__
98
+ if tokenizer_type not in MODEL_TOKENIZER_CONFIGS:
99
+ raise ValueError(
100
+ "Export to Transformers format not implemented for {tokenizer_type}"
101
+ )
102
+ get_tokenizer_config_fn = MODEL_TOKENIZER_CONFIGS[tokenizer_type]
103
+ tokenizer_config = get_tokenizer_config_fn(tokenizer)
104
+ tokenizer_config_path = os.path.join(path, "tokenizer_config.json")
105
+ with open(tokenizer_config_path, "w") as f:
106
+ json.dump(tokenizer_config, f, indent=4)
87
107
  # Rename vocabulary file
88
108
  vocab_spm_path = os.path.join(path, "vocabulary.spm")
89
109
  tokenizer_model_path = os.path.join(path, "tokenizer.model")
@@ -96,3 +116,29 @@ def export_to_safetensors(keras_model, path):
96
116
  "is correct and that the vocabulary file is present "
97
117
  "in the original model."
98
118
  )
119
+
120
+
121
+ def export_to_safetensors(keras_model, path):
122
+ """Converts a Keras model to Hugging Face Transformers format.
123
+
124
+ It does the following:
125
+ - Exports the backbone (config and weights).
126
+ - Exports the tokenizer assets.
127
+
128
+ Args:
129
+ keras_model: The Keras model to convert.
130
+ path: str. Path of the directory to which the safetensors file,
131
+ config and tokenizer will be saved.
132
+ """
133
+ backbone = keras_model.backbone
134
+ export_backbone(backbone, path, include_lm_head=True)
135
+ if (
136
+ keras_model.preprocessor is not None
137
+ and keras_model.preprocessor.tokenizer is None
138
+ ):
139
+ raise ValueError(
140
+ "CausalLM preprocessor must have a tokenizer for export "
141
+ "if attached."
142
+ )
143
+ if keras_model.preprocessor is not None:
144
+ export_tokenizer(keras_model.preprocessor.tokenizer, path)
@@ -18,7 +18,10 @@ from keras_hub.src.utils.transformers import convert_mixtral
18
18
  from keras_hub.src.utils.transformers import convert_pali_gemma
19
19
  from keras_hub.src.utils.transformers import convert_qwen
20
20
  from keras_hub.src.utils.transformers import convert_qwen3
21
+ from keras_hub.src.utils.transformers import convert_qwen3_moe
21
22
  from keras_hub.src.utils.transformers import convert_qwen_moe
23
+ from keras_hub.src.utils.transformers import convert_smollm3
24
+ from keras_hub.src.utils.transformers import convert_t5gemma
22
25
  from keras_hub.src.utils.transformers import convert_vit
23
26
  from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
24
27
 
@@ -60,8 +63,14 @@ class TransformersPresetLoader(PresetLoader):
60
63
  self.converter = convert_mixtral
61
64
  elif model_type == "qwen2_moe":
62
65
  self.converter = convert_qwen_moe
66
+ elif model_type == "qwen3_moe":
67
+ self.converter = convert_qwen3_moe
63
68
  elif model_type == "qwen3":
64
69
  self.converter = convert_qwen3
70
+ elif model_type == "smollm3":
71
+ self.converter = convert_smollm3
72
+ elif model_type == "t5gemma":
73
+ self.converter = convert_t5gemma
65
74
  else:
66
75
  raise ValueError(
67
76
  "KerasHub has no converter for huggingface/transformers models "
keras_hub/src/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.22.1"
4
+ __version__ = "0.23.0.dev0"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -66,6 +66,9 @@ from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer as OPTTokenizer
66
66
  from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
67
67
  PaliGemmaTokenizer as PaliGemmaTokenizer,
68
68
  )
69
+ from keras_hub.src.models.parseq.parseq_tokenizer import (
70
+ PARSeqTokenizer as PARSeqTokenizer,
71
+ )
69
72
  from keras_hub.src.models.phi3.phi3_tokenizer import (
70
73
  Phi3Tokenizer as Phi3Tokenizer,
71
74
  )
@@ -75,6 +78,9 @@ from keras_hub.src.models.qwen.qwen_tokenizer import (
75
78
  from keras_hub.src.models.qwen.qwen_tokenizer import (
76
79
  QwenTokenizer as QwenTokenizer,
77
80
  )
81
+ from keras_hub.src.models.qwen3_moe.qwen3_moe_tokenizer import (
82
+ Qwen3MoeTokenizer as Qwen3MoeTokenizer,
83
+ )
78
84
  from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import (
79
85
  QwenMoeTokenizer as QwenMoeTokenizer,
80
86
  )
@@ -87,7 +93,16 @@ from keras_hub.src.models.roformer_v2.roformer_v2_tokenizer import (
87
93
  from keras_hub.src.models.siglip.siglip_tokenizer import (
88
94
  SigLIPTokenizer as SigLIPTokenizer,
89
95
  )
96
+ from keras_hub.src.models.smollm3.smollm3_tokenizer import (
97
+ SmolLM3Tokenizer as SmolLM3Tokenizer,
98
+ )
99
+ from keras_hub.src.models.smollm3.smollm3_tokenizer import (
100
+ SmolLM3Tokenizer as SmolLMTokenizer,
101
+ )
90
102
  from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer
103
+ from keras_hub.src.models.t5gemma.t5gemma_tokenizer import (
104
+ T5GemmaTokenizer as T5GemmaTokenizer,
105
+ )
91
106
  from keras_hub.src.models.whisper.whisper_tokenizer import (
92
107
  WhisperTokenizer as WhisperTokenizer,
93
108
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: keras-hub
3
- Version: 0.22.1
3
+ Version: 0.23.0.dev0
4
4
  Summary: Pretrained models for Keras.
5
5
  Author-email: Keras team <keras-users@googlegroups.com>
6
6
  License-Expression: Apache-2.0