keras-hub-nightly 0.24.0.dev202511090424__py3-none-any.whl → 0.24.0.dev202512090431__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of keras-hub-nightly might be problematic. Click here for more details.

@@ -429,3 +429,25 @@ class CausalLM(Task):
429
429
  super()._post_quantize(mode, **kwargs)
430
430
  # Reset the compiled generate function.
431
431
  self.generate_function = None
432
+
433
+ def get_quantization_layer_structure(self, mode):
434
+ if mode != "gptq":
435
+ return None
436
+
437
+ backbone = self.backbone
438
+ # Check for standard backbone structure.
439
+ if not hasattr(backbone, "transformer_layers"):
440
+ return None
441
+
442
+ # Check for embedding.
443
+ embedding = getattr(backbone, "token_embedding", None)
444
+ if embedding is None:
445
+ embedding = getattr(backbone, "embedding", None)
446
+
447
+ if embedding is None:
448
+ return None
449
+
450
+ return {
451
+ "pre_block_layers": [embedding],
452
+ "sequential_blocks": backbone.transformer_layers,
453
+ }
@@ -1,4 +1,93 @@
1
1
  """DINOV3 model preset configurations."""
2
2
 
3
3
  # Metadata for loading pretrained model weights.
4
- backbone_presets = {}
4
+ backbone_presets = {
5
+ "dinov3_vit_small_lvd1689m": {
6
+ "metadata": {
7
+ "description": (
8
+ "Vision Transformer (small-sized model) trained on LVD-1689M "
9
+ "using DINOv3."
10
+ ),
11
+ "params": 21_600_000,
12
+ "path": "dinov3",
13
+ },
14
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_small_lvd1689m/1",
15
+ },
16
+ "dinov3_vit_small_plus_lvd1689m": {
17
+ "metadata": {
18
+ "description": (
19
+ "Vision Transformer (small-plus-sized model) trained on "
20
+ "LVD-1689M using DINOv3."
21
+ ),
22
+ "params": 29_000_000,
23
+ "path": "dinov3",
24
+ },
25
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_small_plus_lvd1689m/1",
26
+ },
27
+ "dinov3_vit_base_lvd1689m": {
28
+ "metadata": {
29
+ "description": (
30
+ "Vision Transformer (base-sized model) trained on LVD-1689M "
31
+ "using DINOv3."
32
+ ),
33
+ "params": 86_000_000,
34
+ "path": "dinov3",
35
+ },
36
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_base_lvd1689m/1",
37
+ },
38
+ "dinov3_vit_large_lvd1689m": {
39
+ "metadata": {
40
+ "description": (
41
+ "Vision Transformer (large-sized model) trained on LVD-1689M "
42
+ "using DINOv3."
43
+ ),
44
+ "params": 300_000_000,
45
+ "path": "dinov3",
46
+ },
47
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_large_lvd1689m/1",
48
+ },
49
+ "dinov3_vit_huge_plus_lvd1689m": {
50
+ "metadata": {
51
+ "description": (
52
+ "Vision Transformer (huge-plus-sized model) trained on "
53
+ "LVD-1689M using DINOv3."
54
+ ),
55
+ "params": 840_000_000,
56
+ "path": "dinov3",
57
+ },
58
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_huge_plus_lvd1689m/1",
59
+ },
60
+ "dinov3_vit_7b_lvd1689m": {
61
+ "metadata": {
62
+ "description": (
63
+ "Vision Transformer (7B-sized model) trained on LVD-1689M "
64
+ "using DINOv3."
65
+ ),
66
+ "params": 6_700_000_000,
67
+ "path": "dinov3",
68
+ },
69
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_7b_lvd1689m/1",
70
+ },
71
+ "dinov3_vit_large_sat493m": {
72
+ "metadata": {
73
+ "description": (
74
+ "Vision Transformer (large-sized model) trained on SAT-493M "
75
+ "using DINOv3."
76
+ ),
77
+ "params": 300_000_000,
78
+ "path": "dinov3",
79
+ },
80
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_large_sat493m/1",
81
+ },
82
+ "dinov3_vit_7b_sat493m": {
83
+ "metadata": {
84
+ "description": (
85
+ "Vision Transformer (7B-sized model) trained on SAT-493M "
86
+ "using DINOv3."
87
+ ),
88
+ "params": 6_700_000_000,
89
+ "path": "dinov3",
90
+ },
91
+ "kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_7b_sat493m/1",
92
+ },
93
+ }
@@ -14,7 +14,8 @@ class ESMRotaryEmbedding(RotaryEmbedding):
14
14
  inv_freq = self.scaling_factor / (
15
15
  self.max_wavelength ** (ops.arange(0, dim, 2, dtype=x.dtype) / dim)
16
16
  )
17
- t = ops.arange(x.shape[position], dtype=x.dtype)
17
+ # Use ops.shape for dynamic shape compatibility with TFLite
18
+ t = ops.arange(ops.shape(x)[position], dtype=x.dtype)
18
19
  freqs = ops.outer(t, inv_freq)
19
20
  emb = ops.concatenate((freqs, freqs), axis=-1)
20
21
 
@@ -32,11 +33,17 @@ class ESMRotaryEmbedding(RotaryEmbedding):
32
33
 
33
34
  def rotate_half(self, x):
34
35
  x1, x2 = ops.split(x, 2, -1)
35
- return ops.concatenate((-x2, x1), axis=-1)
36
+ # Avoid `ops.concatenate` to prevent XLA compilation issues on JAX
37
+ # backend. Use stack + reshape approach from base RotaryEmbedding.
38
+ half_rot_x = ops.stack((-x2, x1), axis=-2)
39
+ half_rot_x = ops.reshape(half_rot_x, ops.shape(x))
40
+ return half_rot_x
36
41
 
37
42
  def apply_rotary_pos_emb(self, x, cos, sin):
38
- cos = cos[:, : x.shape[1], :, :]
39
- sin = sin[:, : x.shape[1], :, :]
43
+ # Use ops.shape for dynamic shape compatibility with TFLite
44
+ seq_len = ops.shape(x)[1]
45
+ cos = cos[:, :seq_len, :, :]
46
+ sin = sin[:, :seq_len, :, :]
40
47
 
41
48
  return (x * cos) + (self.rotate_half(x) * sin)
42
49
 
@@ -431,3 +431,19 @@ class GemmaCausalLM(CausalLM):
431
431
  )
432
432
  per_token_loss = per_token_loss_fn(target_ids, logits)
433
433
  return per_token_loss
434
+
435
+ def get_quantization_layer_structure(self, mode):
436
+ if mode != "gptq":
437
+ return None
438
+
439
+ # Wrap embedding + scaling
440
+ backbone = self.backbone
441
+ inputs = keras.Input(shape=(None,), dtype="int32")
442
+ x = backbone.token_embedding(inputs)
443
+ x = x * ops.cast(ops.sqrt(backbone.hidden_dim), x.dtype)
444
+ pre_processor = keras.Model(inputs=inputs, outputs=x)
445
+
446
+ return {
447
+ "pre_block_layers": [pre_processor],
448
+ "sequential_blocks": backbone.transformer_layers,
449
+ }
@@ -283,9 +283,14 @@ class Gemma3CausalLMPreprocessor(CausalLMPreprocessor):
283
283
  # is `None`.
284
284
  self.text_only_model = self.image_converter is None
285
285
 
286
- self.image_placeholder = self.tokenizer.image_placeholder
287
- self.start_of_image_token = self.tokenizer.start_of_image_token
288
- self.end_of_image_token = self.tokenizer.end_of_image_token
286
+ if self.text_only_model:
287
+ self.image_placeholder = None
288
+ self.start_of_image_token = None
289
+ self.end_of_image_token = None
290
+ else:
291
+ self.image_placeholder = self.tokenizer.image_placeholder
292
+ self.start_of_image_token = self.tokenizer.start_of_image_token
293
+ self.end_of_image_token = self.tokenizer.end_of_image_token
289
294
 
290
295
  def build(self, input_shape):
291
296
  # Defer packer creation to `build()` so that we can be sure tokenizer
@@ -77,20 +77,32 @@ class Gemma3Tokenizer(SentencePieceTokenizer):
77
77
 
78
78
  backbone_cls = Gemma3Backbone
79
79
 
80
- def __init__(self, proto, **kwargs):
80
+ def __init__(self, proto, has_vision_tokens=True, **kwargs):
81
81
  # Add special tokens.
82
82
 
83
+ self.has_vision_tokens = has_vision_tokens
83
84
  # The usual tokens.
84
85
  self._add_special_token("<bos>", "start_token")
85
86
  self._add_special_token("<eos>", "end_token")
86
87
  self._add_special_token("<pad>", "pad_token")
87
88
 
88
- # Image placeholder token.
89
- self._add_special_token("<img>", "image_placeholder")
90
-
91
- # Some tokens which are used in the preprocessor. We need to keep them
92
- # here so that the preprocessor works with `tf.data`.
93
- self._add_special_token("<start_of_image>", "start_of_image_token")
94
- self._add_special_token("<end_of_image>", "end_of_image_token")
89
+ if has_vision_tokens:
90
+ # Image placeholder token.
91
+ self._add_special_token("<img>", "image_placeholder")
92
+ # Some tokens which are used in the preprocessor.
93
+ # We need to keep them
94
+ # here so that the preprocessor works with tf.data.
95
+ self._add_special_token("<start_of_image>", "start_of_image_token")
96
+ self._add_special_token("<end_of_image>", "end_of_image_token")
97
+ else:
98
+ # For text-only, skip assigning token IDs or set to -1
99
+ self.start_of_image_token_id = -1
100
+ self.image_placeholder_token_id = -1
101
+ self.end_of_image_token_id = -1
95
102
 
96
103
  super().__init__(proto=proto, **kwargs)
104
+
105
+ def get_config(self):
106
+ config = super().get_config()
107
+ config.update({"has_vision_tokens": self.has_vision_tokens})
108
+ return config
@@ -420,3 +420,20 @@ class GPT2CausalLM(CausalLM):
420
420
  )
421
421
  per_token_loss = per_token_loss_fn(target_ids, logits)
422
422
  return per_token_loss
423
+
424
+ def get_quantization_layer_structure(self, mode):
425
+ if mode != "gptq":
426
+ return None
427
+
428
+ backbone = self.backbone
429
+ token_ids = keras.Input(shape=(None,), dtype="int32")
430
+ tokens = backbone.token_embedding(token_ids)
431
+ positions = backbone.position_embedding(tokens)
432
+ x = backbone.embeddings_add((tokens, positions))
433
+ x = backbone.embeddings_dropout(x)
434
+ pre_processor = keras.Model(inputs=token_ids, outputs=x)
435
+
436
+ return {
437
+ "pre_block_layers": [pre_processor],
438
+ "sequential_blocks": backbone.transformer_layers,
439
+ }
@@ -84,3 +84,25 @@ class MaskedLM(Task):
84
84
  weighted_metrics=weighted_metrics,
85
85
  **kwargs,
86
86
  )
87
+
88
+ def get_quantization_layer_structure(self, mode):
89
+ if mode != "gptq":
90
+ return None
91
+
92
+ backbone = self.backbone
93
+ # Check for standard backbone structure.
94
+ if not hasattr(backbone, "transformer_layers"):
95
+ return None
96
+
97
+ # Check for embedding.
98
+ embedding = getattr(backbone, "token_embedding", None)
99
+ if embedding is None:
100
+ embedding = getattr(backbone, "embedding", None)
101
+
102
+ if embedding is None:
103
+ return None
104
+
105
+ return {
106
+ "pre_block_layers": [embedding],
107
+ "sequential_blocks": backbone.transformer_layers,
108
+ }
@@ -70,4 +70,40 @@ backbone_presets = {
70
70
  },
71
71
  "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_32b_en/1",
72
72
  },
73
+ "qwen3_embedding_0.6b_en": {
74
+ "metadata": {
75
+ "description": (
76
+ "This text embedding model features a 32k context length and "
77
+ "offers flexible, user-defined embedding dimensions that can "
78
+ "range from 32 to 1024."
79
+ ),
80
+ "params": 595776512,
81
+ "path": "qwen3",
82
+ },
83
+ "kaggle_handle": "kaggle://keras/qwen-3-embedding/keras/qwen3_embedding_0.6b_en/1",
84
+ },
85
+ "qwen3_embedding_4b_en": {
86
+ "metadata": {
87
+ "description": (
88
+ "This text embedding model features a 32k context length and "
89
+ "offers flexible, user-defined embedding dimensions that can "
90
+ "range from 32 to 2560."
91
+ ),
92
+ "params": 4021774336,
93
+ "path": "qwen3",
94
+ },
95
+ "kaggle_handle": "kaggle://keras/qwen-3-embedding/keras/qwen3_embedding_4b_en/1",
96
+ },
97
+ "qwen3_embedding_8b_en": {
98
+ "metadata": {
99
+ "description": (
100
+ "This text embedding model features a 32k context length and "
101
+ "offers flexible, user-defined embedding dimensions that can "
102
+ "range from 32 to 4096."
103
+ ),
104
+ "params": 8188515328,
105
+ "path": "qwen3",
106
+ },
107
+ "kaggle_handle": "kaggle://keras/qwen-3-embedding/keras/qwen3_embedding_8b_en/1",
108
+ },
73
109
  }
@@ -0,0 +1,5 @@
1
+ from keras_hub.src.models.smollm3.smollm3_backbone import SmolLM3Backbone
2
+ from keras_hub.src.models.smollm3.smollm3_presets import backbone_presets
3
+ from keras_hub.src.utils.preset_utils import register_presets
4
+
5
+ register_presets(backbone_presets, SmolLM3Backbone)
@@ -0,0 +1,16 @@
1
+ """SmolLM3 model preset configurations."""
2
+
3
+ backbone_presets = {
4
+ "smollm3_3b_en": {
5
+ "metadata": {
6
+ "description": (
7
+ "Dense decoder-only model has 3 billion total parameters, "
8
+ "built on 36 layers and utilizes 16 query and "
9
+ "4 key/value attention heads."
10
+ ),
11
+ "params": 3075100928,
12
+ "path": "smollm3",
13
+ },
14
+ "kaggle_handle": "kaggle://keras/smollm3/keras/smollm3_3b_en/1",
15
+ },
16
+ }
@@ -0,0 +1,353 @@
1
+ import numpy as np
2
+ from sentencepiece import SentencePieceProcessor
3
+
4
+ from keras_hub.src.models.gemma3.gemma3_backbone import Gemma3Backbone
5
+ from keras_hub.src.models.gemma3.gemma3_vision_encoder import (
6
+ Gemma3VisionEncoder,
7
+ )
8
+ from keras_hub.src.utils.preset_utils import get_file
9
+ from keras_hub.src.utils.preset_utils import load_json
10
+
11
+ backbone_cls = Gemma3Backbone
12
+
13
+
14
+ def load_image_converter_config(preset, transformers_config):
15
+ if "vision_config" in transformers_config:
16
+ preprocessor_config = load_json(preset, "preprocessor_config.json")
17
+ mean = preprocessor_config["image_mean"]
18
+ std = preprocessor_config["image_std"]
19
+ rescale_factor = preprocessor_config["rescale_factor"]
20
+ offset = [(-m / s) for m, s in zip(mean, std)]
21
+ scale = [(s * rescale_factor) for s in std]
22
+ image_size = transformers_config["vision_config"].get("image_size", 224)
23
+ return {
24
+ "image_size": (image_size, image_size),
25
+ "scale": scale,
26
+ "offset": offset,
27
+ }
28
+ else:
29
+ return None
30
+
31
+
32
+ def convert_backbone_config(transformers_config):
33
+ if transformers_config["model_type"] == "gemma3_text":
34
+ image_size = None
35
+ vision_encoder = None
36
+ transformer_config = transformers_config
37
+ else:
38
+ vision_config = transformers_config["vision_config"]
39
+ image_size = vision_config["image_size"]
40
+ vision_encoder_config = {
41
+ "image_size": image_size,
42
+ "patch_size": vision_config["patch_size"],
43
+ "num_heads": vision_config["num_attention_heads"],
44
+ "hidden_dim": vision_config["hidden_size"],
45
+ "num_layers": vision_config["num_hidden_layers"],
46
+ "intermediate_dim": vision_config["intermediate_size"],
47
+ "output_dim": 2560,
48
+ "pool_size": 4,
49
+ "layer_norm_epsilon": vision_config.get("layer_norm_eps", 1e-6),
50
+ }
51
+ vision_encoder = Gemma3VisionEncoder(**vision_encoder_config)
52
+ transformer_config = transformers_config["text_config"]
53
+
54
+ if "rope_parameters" in transformer_config:
55
+ rope_global_config = transformer_config.get("rope_parameters", {}).get(
56
+ "full_attention"
57
+ )
58
+ elif "rope_scaling" in transformer_config:
59
+ rope_global_config = transformer_config["rope_scaling"]
60
+ else:
61
+ rope_global_config = {}
62
+ return {
63
+ "vocabulary_size": transformer_config.get(
64
+ "vocab_size", 262144 if vision_encoder is None else 262208
65
+ ),
66
+ "image_size": image_size,
67
+ "num_layers": transformer_config["num_hidden_layers"],
68
+ "num_query_heads": transformer_config.get("num_attention_heads", 8),
69
+ "num_key_value_heads": transformer_config.get("num_key_value_heads", 4),
70
+ "hidden_dim": transformer_config["hidden_size"],
71
+ "intermediate_dim": transformer_config["intermediate_size"],
72
+ "head_dim": transformer_config["head_dim"],
73
+ "use_post_ffw_norm": True,
74
+ "use_post_attention_norm": True,
75
+ "attention_logit_softcap": transformer_config.get(
76
+ "attn_logit_softcap", None
77
+ ),
78
+ "final_logit_softcap": transformer_config.get(
79
+ "final_logit_softcap", None
80
+ ),
81
+ "use_sliding_window_attention": True,
82
+ "query_head_dim_normalize": True,
83
+ "sliding_window_size": transformer_config["sliding_window"],
84
+ "local_rope_scaling_factor": 1.0,
85
+ "global_rope_scaling_factor": (
86
+ rope_global_config.get("factor", 1.0) if rope_global_config else 1.0
87
+ ),
88
+ "layer_norm_epsilon": transformer_config.get("rms_norm_eps", 1e-6),
89
+ "use_bidirectional_attention": transformer_config.get(
90
+ "use_bidirectional_attention", False
91
+ ),
92
+ "vision_encoder": vision_encoder,
93
+ }
94
+
95
+
96
+ def convert_weights(backbone, loader, transformers_config):
97
+ if transformers_config["model_type"] == "gemma3_text":
98
+ prefix = "model"
99
+ else:
100
+ prefix = "language_model.model"
101
+
102
+ loader.port_weight(
103
+ keras_variable=backbone.get_layer("token_embedding").embeddings,
104
+ hf_weight_key=f"{prefix}.embed_tokens.weight",
105
+ )
106
+
107
+ def transpose(x, shape):
108
+ return np.transpose(x)
109
+
110
+ vision_encoder = backbone.vision_encoder
111
+ if vision_encoder is not None:
112
+ image_encoder = vision_encoder.get_layer("image_encoder")
113
+
114
+ loader.port_weight(
115
+ keras_variable=image_encoder.vision_embeddings.patch_embedding.kernel,
116
+ hf_weight_key="vision_tower.vision_model.embeddings.patch_embedding.weight",
117
+ hook_fn=lambda x, _: np.transpose(x, (2, 3, 1, 0)),
118
+ )
119
+ loader.port_weight(
120
+ keras_variable=image_encoder.vision_embeddings.patch_embedding.bias,
121
+ hf_weight_key="vision_tower.vision_model.embeddings.patch_embedding.bias",
122
+ )
123
+
124
+ loader.port_weight(
125
+ keras_variable=image_encoder.vision_embeddings.position_embedding.embeddings,
126
+ hf_weight_key="vision_tower.vision_model.embeddings.position_embedding.weight",
127
+ )
128
+
129
+ for i in range(image_encoder.num_layers):
130
+ loader.port_weight(
131
+ keras_variable=image_encoder.resblocks[i].layer_norm_1.gamma,
132
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm1.weight",
133
+ )
134
+ loader.port_weight(
135
+ keras_variable=image_encoder.resblocks[i].layer_norm_1.beta,
136
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm1.bias",
137
+ )
138
+ loader.port_weight(
139
+ keras_variable=image_encoder.resblocks[
140
+ i
141
+ ].attn.query_proj.kernel,
142
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.q_proj.weight",
143
+ hook_fn=transpose,
144
+ )
145
+ loader.port_weight(
146
+ keras_variable=image_encoder.resblocks[i].attn.query_proj.bias,
147
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.q_proj.bias",
148
+ )
149
+ loader.port_weight(
150
+ keras_variable=image_encoder.resblocks[i].attn.key_proj.kernel,
151
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.k_proj.weight",
152
+ hook_fn=transpose,
153
+ )
154
+ loader.port_weight(
155
+ keras_variable=image_encoder.resblocks[i].attn.key_proj.bias,
156
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.k_proj.bias",
157
+ )
158
+ loader.port_weight(
159
+ keras_variable=image_encoder.resblocks[
160
+ i
161
+ ].attn.value_proj.kernel,
162
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.v_proj.weight",
163
+ hook_fn=transpose,
164
+ )
165
+ loader.port_weight(
166
+ keras_variable=image_encoder.resblocks[i].attn.value_proj.bias,
167
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.v_proj.bias",
168
+ )
169
+ loader.port_weight(
170
+ keras_variable=image_encoder.resblocks[i].attn.out_proj.kernel,
171
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.out_proj.weight",
172
+ hook_fn=transpose,
173
+ )
174
+ loader.port_weight(
175
+ keras_variable=image_encoder.resblocks[i].attn.out_proj.bias,
176
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.out_proj.bias",
177
+ )
178
+
179
+ loader.port_weight(
180
+ keras_variable=image_encoder.resblocks[i].layer_norm_2.gamma,
181
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm2.weight",
182
+ )
183
+ loader.port_weight(
184
+ keras_variable=image_encoder.resblocks[i].layer_norm_2.beta,
185
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm2.bias",
186
+ )
187
+ loader.port_weight(
188
+ keras_variable=image_encoder.resblocks[i].mlp_dense_1.kernel,
189
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc1.weight",
190
+ hook_fn=transpose,
191
+ )
192
+ loader.port_weight(
193
+ keras_variable=image_encoder.resblocks[i].mlp_dense_1.bias,
194
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc1.bias",
195
+ )
196
+ loader.port_weight(
197
+ keras_variable=image_encoder.resblocks[i].mlp_dense_2.kernel,
198
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc2.weight",
199
+ hook_fn=transpose,
200
+ )
201
+ loader.port_weight(
202
+ keras_variable=image_encoder.resblocks[i].mlp_dense_2.bias,
203
+ hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc2.bias",
204
+ )
205
+
206
+ loader.port_weight(
207
+ keras_variable=image_encoder.encoder_layer_norm.gamma,
208
+ hf_weight_key="vision_tower.vision_model.post_layernorm.weight",
209
+ )
210
+ loader.port_weight(
211
+ keras_variable=image_encoder.encoder_layer_norm.beta,
212
+ hf_weight_key="vision_tower.vision_model.post_layernorm.bias",
213
+ )
214
+
215
+ loader.port_weight(
216
+ keras_variable=vision_encoder.get_layer(
217
+ "vision_output_encoder"
218
+ ).vision_soft_embedding_norm.scale,
219
+ hf_weight_key="multi_modal_projector.mm_soft_emb_norm.weight",
220
+ )
221
+
222
+ loader.port_weight(
223
+ keras_variable=vision_encoder.get_layer(
224
+ "vision_output_encoder"
225
+ ).vision_input_projection.kernel,
226
+ hf_weight_key="multi_modal_projector.mm_input_projection_weight",
227
+ )
228
+
229
+ for i in range(backbone.num_layers):
230
+ decoder_layer = backbone.get_layer(f"decoder_block_{i}")
231
+
232
+ loader.port_weight(
233
+ keras_variable=decoder_layer.pre_attention_norm.scale,
234
+ hf_weight_key=f"{prefix}.layers.{i}.input_layernorm.weight",
235
+ )
236
+ loader.port_weight(
237
+ keras_variable=decoder_layer.post_attention_norm.scale,
238
+ hf_weight_key=f"{prefix}.layers.{i}.post_attention_layernorm.weight",
239
+ )
240
+ loader.port_weight(
241
+ keras_variable=decoder_layer.pre_ffw_norm.scale,
242
+ hf_weight_key=f"{prefix}.layers.{i}.pre_feedforward_layernorm.weight",
243
+ )
244
+ loader.port_weight(
245
+ keras_variable=decoder_layer.post_ffw_norm.scale,
246
+ hf_weight_key=f"{prefix}.layers.{i}.post_feedforward_layernorm.weight",
247
+ )
248
+
249
+ # Attention layers
250
+
251
+ ## Query
252
+ loader.port_weight(
253
+ keras_variable=decoder_layer.attention.query_dense.kernel,
254
+ hf_weight_key=f"{prefix}.layers.{i}.self_attn.q_proj.weight",
255
+ hook_fn=lambda hf_tensor, keras_shape: np.transpose(
256
+ np.reshape(
257
+ hf_tensor,
258
+ (keras_shape[0], keras_shape[2], keras_shape[1]),
259
+ ),
260
+ axes=(0, 2, 1),
261
+ ),
262
+ )
263
+ loader.port_weight(
264
+ keras_variable=decoder_layer.attention.query_norm.scale,
265
+ hf_weight_key=f"{prefix}.layers.{i}.self_attn.q_norm.weight",
266
+ )
267
+ ## Key
268
+ loader.port_weight(
269
+ keras_variable=decoder_layer.attention.key_dense.kernel,
270
+ hf_weight_key=f"{prefix}.layers.{i}.self_attn.k_proj.weight",
271
+ hook_fn=lambda hf_tensor, keras_shape: np.transpose(
272
+ np.reshape(
273
+ hf_tensor,
274
+ (keras_shape[0], keras_shape[2], keras_shape[1]),
275
+ ),
276
+ axes=(0, 2, 1),
277
+ ),
278
+ )
279
+ loader.port_weight(
280
+ keras_variable=decoder_layer.attention.key_norm.scale,
281
+ hf_weight_key=f"{prefix}.layers.{i}.self_attn.k_norm.weight",
282
+ )
283
+ ## Value
284
+ loader.port_weight(
285
+ keras_variable=decoder_layer.attention.value_dense.kernel,
286
+ hf_weight_key=f"{prefix}.layers.{i}.self_attn.v_proj.weight",
287
+ hook_fn=lambda hf_tensor, keras_shape: np.transpose(
288
+ np.reshape(
289
+ hf_tensor,
290
+ (keras_shape[0], keras_shape[2], keras_shape[1]),
291
+ ),
292
+ axes=(0, 2, 1),
293
+ ),
294
+ )
295
+ ## Output
296
+ loader.port_weight(
297
+ keras_variable=decoder_layer.attention.output_dense.kernel,
298
+ hf_weight_key=f"{prefix}.layers.{i}.self_attn.o_proj.weight",
299
+ # rearrange_patterns="c (a b) -> a b c",
300
+ # rearrange_dims={"a": backbone.num_query_heads},
301
+ hook_fn=lambda hf_tensor, keras_shape: np.transpose(
302
+ np.reshape(
303
+ hf_tensor,
304
+ (keras_shape[2], keras_shape[0], keras_shape[1]),
305
+ ),
306
+ axes=(1, 2, 0),
307
+ ),
308
+ )
309
+
310
+ # MLP layers
311
+ loader.port_weight(
312
+ keras_variable=decoder_layer.gating_ffw.kernel,
313
+ hf_weight_key=f"{prefix}.layers.{i}.mlp.gate_proj.weight",
314
+ # rearrange_patterns="b a -> a b",
315
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
316
+ )
317
+ loader.port_weight(
318
+ keras_variable=decoder_layer.gating_ffw_2.kernel,
319
+ hf_weight_key=f"{prefix}.layers.{i}.mlp.up_proj.weight",
320
+ # rearrange_patterns="b a -> a b",
321
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
322
+ )
323
+ loader.port_weight(
324
+ keras_variable=decoder_layer.ffw_linear.kernel,
325
+ hf_weight_key=f"{prefix}.layers.{i}.mlp.down_proj.weight",
326
+ # rearrange_patterns="b a -> a b",
327
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
328
+ )
329
+
330
+ # Final normalization layer
331
+ loader.port_weight(
332
+ keras_variable=backbone.get_layer("final_normalization").scale,
333
+ hf_weight_key=f"{prefix}.norm.weight",
334
+ )
335
+
336
+ return backbone
337
+
338
+
339
+ def convert_tokenizer(cls, preset, **kwargs):
340
+ proto = get_file(preset, "tokenizer.model")
341
+ sp = SentencePieceProcessor()
342
+ if isinstance(proto, bytes):
343
+ sp.LoadFromSerializedProto(proto)
344
+ else:
345
+ sp.load(proto)
346
+
347
+ has_vision_tokens = (
348
+ sp.PieceToId("<start_of_image>") != sp.unk_id()
349
+ and sp.PieceToId("<img>") != sp.unk_id()
350
+ and sp.PieceToId("<end_of_image>") != sp.unk_id()
351
+ )
352
+
353
+ return cls(proto, has_vision_tokens=has_vision_tokens, **kwargs)
@@ -12,6 +12,7 @@ from keras_hub.src.utils.transformers import convert_dinov3
12
12
  from keras_hub.src.utils.transformers import convert_distilbert
13
13
  from keras_hub.src.utils.transformers import convert_esm
14
14
  from keras_hub.src.utils.transformers import convert_gemma
15
+ from keras_hub.src.utils.transformers import convert_gemma3
15
16
  from keras_hub.src.utils.transformers import convert_gpt2
16
17
  from keras_hub.src.utils.transformers import convert_llama3
17
18
  from keras_hub.src.utils.transformers import convert_mistral
@@ -49,6 +50,8 @@ class TransformersPresetLoader(PresetLoader):
49
50
  self.converter = convert_esm
50
51
  elif model_type in ("gemma", "gemma2"):
51
52
  self.converter = convert_gemma
53
+ elif model_type in ("gemma3", "gemma3_text"):
54
+ self.converter = convert_gemma3
52
55
  elif model_type == "gpt2":
53
56
  self.converter = convert_gpt2
54
57
  elif model_type == "llama":
@@ -115,5 +118,11 @@ class TransformersPresetLoader(PresetLoader):
115
118
  return self.converter.convert_tokenizer(cls, self.preset, **kwargs)
116
119
 
117
120
  def load_image_converter(self, cls, **kwargs):
121
+ if hasattr(self.converter, "load_image_converter_config"):
122
+ config = self.converter.load_image_converter_config(
123
+ self.preset, self.config
124
+ )
125
+ if config is not None:
126
+ return cls(**{**config, **kwargs})
118
127
  # TODO: set image size for pali gemma checkpoints.
119
128
  return None
keras_hub/src/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.24.0.dev202511090424"
4
+ __version__ = "0.24.0.dev202512090431"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: keras-hub-nightly
3
- Version: 0.24.0.dev202511090424
3
+ Version: 0.24.0.dev202512090431
4
4
  Summary: Pretrained models for Keras.
5
5
  Author-email: Keras team <keras-users@googlegroups.com>
6
6
  License-Expression: Apache-2.0
@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=XGYkwfBVZiPw5ZjSV5S_n3FnkPf06yYNzxZjXMhiX70,
5
5
  keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
6
6
  keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
8
- keras_hub/src/version.py,sha256=0oZ2eQ3pK7UNLgYg6OOna2ubpYCdPpH9WrlbvIq-QC0,222
8
+ keras_hub/src/version.py,sha256=RwtmvWeWT3DYRT6L3ZzcKzMVzwl83RHUWlQ1KmiI2nM,222
9
9
  keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -44,7 +44,7 @@ keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
44
44
  keras_hub/src/models/audio_to_text.py,sha256=XoOjXtKBX6K1fz-zOXcdVo3FpjuxCMnJZh2LQcYXb_0,2726
45
45
  keras_hub/src/models/audio_to_text_preprocessor.py,sha256=GS-WWyJ6aSsPRxi_0bxvxA00h2mT2FEwSdAoQXAUYVI,3249
46
46
  keras_hub/src/models/backbone.py,sha256=BdqPsne7lIITIxn6jY6AN4vZ-Rc9VnpqTxvVNR3CS7M,12210
47
- keras_hub/src/models/causal_lm.py,sha256=x86PTAzoBpAdJyenPRNNBAkazUjcRLr4wb2hMs5SrQ0,18344
47
+ keras_hub/src/models/causal_lm.py,sha256=3WVFo9WAd4ZDu-62X98JMoUw1bqfTlVR0lQPb6vmr1g,18989
48
48
  keras_hub/src/models/causal_lm_preprocessor.py,sha256=nxl-sfmCfkfl6JmVRASa878QbaZUgWSA6Jdu48x4-dY,7155
49
49
  keras_hub/src/models/depth_estimator.py,sha256=JR7wtunOPrfEoDkLspoZnL2ItWhZFDeAxxw2vue5QLs,8992
50
50
  keras_hub/src/models/depth_estimator_preprocessor.py,sha256=2iE8NAUyiD2AvjZwNoXKUaOUogcE1fRzTNXLQ75GZpQ,2822
@@ -55,7 +55,7 @@ keras_hub/src/models/image_segmenter.py,sha256=C1bzIO59pG58iist5GLn_qnlotDpcAVxP
55
55
  keras_hub/src/models/image_segmenter_preprocessor.py,sha256=d7I2Hk0SKWyKpjRS6WYccmh_CYQBpWoj0JF5RRrU6rw,3748
56
56
  keras_hub/src/models/image_to_image.py,sha256=nblRd-16n5_JxKIH6IJU7bHTFRGxyCpKUilg6VjWuek,16933
57
57
  keras_hub/src/models/inpaint.py,sha256=oqdj0Q9dNG54g6sNQ5foto8saPd5Sx8kYZuHCZPBqrY,20995
58
- keras_hub/src/models/masked_lm.py,sha256=uXO_dE_hILlOC9jNr6oK6IHi9IGUqLyNGvr6nMt8Rk0,3576
58
+ keras_hub/src/models/masked_lm.py,sha256=EPDHfMxyO_pciKuLooR6T5XYRcfvsqgfDXumXkA41No,4221
59
59
  keras_hub/src/models/masked_lm_preprocessor.py,sha256=g8vrnyYwqdnSw5xppROM1Gzo_jmMWKYZoQCsKdfrFKk,5656
60
60
  keras_hub/src/models/object_detector.py,sha256=oAK42fFBKuN0G_WM-DhygFkgQ0KsEwU_ZiU4umHywqc,3757
61
61
  keras_hub/src/models/object_detector_preprocessor.py,sha256=kOSVRNFAg-UjtrCEVBdHXUFyJy7kQtlVuGnZ1aLEfOk,2664
@@ -180,7 +180,7 @@ keras_hub/src/models/dinov3/__init__.py,sha256=AI7vTZJBG6Ygb48o6pXtHzxKk0Rek3p7-
180
180
  keras_hub/src/models/dinov3/dinov3_backbone.py,sha256=WDHipJSG10seRzYG_hARifF52wqhj9enkhuZ6mgJmjw,10511
181
181
  keras_hub/src/models/dinov3/dinov3_image_converter.py,sha256=_oHDcI2CoxjbSLxLfkK1zEPcf4Goy0S66igmrXt58cQ,342
182
182
  keras_hub/src/models/dinov3/dinov3_layers.py,sha256=w5K2btblrgrULqzPQdbvtkyR5Px2UZkqcZQ7jq2K3Uk,37169
183
- keras_hub/src/models/dinov3/dinov3_presets.py,sha256=AXXdrgrs9WBrsGlac0TgWV0DIPnvKdlxD3kUhbii1sk,114
183
+ keras_hub/src/models/dinov3/dinov3_presets.py,sha256=oAAhMFbBMPmhtoDj3DMZz9zAG1DVSrf-xw0czoPwOEc,3148
184
184
  keras_hub/src/models/distil_bert/__init__.py,sha256=3Z0w-Mt3aOR0u9RGzjHQ7B3J3qBF2pGjupDGQ9yyzoc,303
185
185
  keras_hub/src/models/distil_bert/distil_bert_backbone.py,sha256=rnAf_GokB3wAeJwVZtgUKQO_bKJIa8RavhL_ykTJpNw,6440
186
186
  keras_hub/src/models/distil_bert/distil_bert_masked_lm.py,sha256=axeZd5UcxFr3_Q8H4yG10CINh93wbcyjlPLauqe5N9E,4289
@@ -203,7 +203,7 @@ keras_hub/src/models/electra/electra_backbone.py,sha256=h-QuFxACBvbMktkyGV2pIgn6
203
203
  keras_hub/src/models/electra/electra_presets.py,sha256=6f0WAYtDx5To4gvi6btN8I8y7yfc9ANchTHRKgCyIkg,2697
204
204
  keras_hub/src/models/electra/electra_tokenizer.py,sha256=Ll_EW-14i-OZr6appQEt5ceMUCeEadF4yPJHMwaRfVs,2729
205
205
  keras_hub/src/models/esm/__init__.py,sha256=_IlazeBwHkpetmLIZz3fFzC8CFcSnBRVQvw9nes4TN8,239
206
- keras_hub/src/models/esm/esm_attention.py,sha256=T21MVs9QDUe_8a53mcW3dJvJfaNZOg5lkMdxGhQdmFQ,3327
206
+ keras_hub/src/models/esm/esm_attention.py,sha256=w74krK1xcsECGLS7v5MfABeW_4n5mexaqhosw6oGPnw,3729
207
207
  keras_hub/src/models/esm/esm_backbone.py,sha256=ADIpeiYz16fw1PNvx2tX-51HsZ_AjR2wGLkXZHErWBg,8696
208
208
  keras_hub/src/models/esm/esm_classifier.py,sha256=35-_3U725JhzspQAO_4ZkTJ0Tuy0XKMVkSrpmFz2CaE,6049
209
209
  keras_hub/src/models/esm/esm_classifier_preprocessor.py,sha256=TXjGH8ttElEsfBLOMLrxP24uPCYVS78iCrnpsGwurII,5532
@@ -238,7 +238,7 @@ keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=2kI2vSZvTia5
238
238
  keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
239
239
  keras_hub/src/models/gemma/gemma_attention.py,sha256=wmU5FgQu1Ajg-KHKVXTLHWH7pXqN4_zVJTCp_FXMcAs,10095
240
240
  keras_hub/src/models/gemma/gemma_backbone.py,sha256=pAAVaVKB6nlA0PncVnFXvNgJV7SeZy_ko2AxoIs0jF0,13364
241
- keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=3OXaIXlrKqMIuUnBk-bUz-0SYFL-XkkQTWm8qRY2YII,16770
241
+ keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=5sOXph25bfwlz-o4MYuWU1BN8yTcj5l0R-x6y0sFams,17315
242
242
  keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
243
243
  keras_hub/src/models/gemma/gemma_decoder_block.py,sha256=f5UsRO-VNsKJfm_WHVJWK4UahhzYm3sKprJ8jjr-zm4,7628
244
244
  keras_hub/src/models/gemma/gemma_presets.py,sha256=wAH7mjz9tbQqqdwajU2dilGytnWK1qc-aTIVLtjpTWg,8263
@@ -248,17 +248,17 @@ keras_hub/src/models/gemma3/__init__.py,sha256=oPFadkdK5DRLD6sYx83iTetY5daWuSzmJ
248
248
  keras_hub/src/models/gemma3/gemma3_attention.py,sha256=u3RNI8dva5lzzqFNTAe9996s87cNJ_GEWc9BIJD337Q,15473
249
249
  keras_hub/src/models/gemma3/gemma3_backbone.py,sha256=HdWDRuF9MMwIzNVZEd1j53ILzptskvCxFiO__nfVQYU,16686
250
250
  keras_hub/src/models/gemma3/gemma3_causal_lm.py,sha256=U3C9TWlIz8VefAxQ0wJ6bDz18wqHBie8B26Ub_nFZs4,13843
251
- keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=vjt4N-zr0Eb5kvkOR-WUgskDTNe64L_6tYnhyNb6xaE,29601
251
+ keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=_gvKPoXqNXpXcsfc8L29wW50MToHIr2D-4Q6MNVfBU0,29790
252
252
  keras_hub/src/models/gemma3/gemma3_decoder_block.py,sha256=CYwYazqwakLNfhOLBl_8Q2TVZcMcOxMtiZtuVlk_hoo,11470
253
253
  keras_hub/src/models/gemma3/gemma3_image_converter.py,sha256=czi5JrTyKiK0nFzvonviBIX8jjvLHqvGNA9RyheB31k,536
254
254
  keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py,sha256=CfYdudk5En9iU6vEnrcrEWIztloD1r8VzF2extqAhAM,4616
255
255
  keras_hub/src/models/gemma3/gemma3_presets.py,sha256=1GZSwsGRA19RllhZPR-kFjH5y9A6308V3TYfqHAnXUw,7744
256
- keras_hub/src/models/gemma3/gemma3_tokenizer.py,sha256=ZaBclFIwzJkSXDuZMBQLHUKV8RWEdZ_dsJMvMcc3qXw,3215
256
+ keras_hub/src/models/gemma3/gemma3_tokenizer.py,sha256=7ZrWgSnXTnxbJVGdHPrz2rSJZhOkii0cxd5zvx4pY-0,3730
257
257
  keras_hub/src/models/gemma3/gemma3_vision_encoder.py,sha256=7XI0oBjIfJItV5w90t5bWb3C2KzjhvDnIC7wjIq4Cns,20850
258
258
  keras_hub/src/models/gemma3/rms_normalization.py,sha256=fku-JEo2sNy-ytX7ySD1sRzdhRAPmYex_z8oFk1NiG8,833
259
259
  keras_hub/src/models/gpt2/__init__.py,sha256=_hqeljpBkW8DLABy4nKBzJxXUh29WIEW27obmDCiH5Q,245
260
260
  keras_hub/src/models/gpt2/gpt2_backbone.py,sha256=H1LgDd-bavrWtdCavdI519qlaruE2Jj5H3-SMc-5d14,6961
261
- keras_hub/src/models/gpt2/gpt2_causal_lm.py,sha256=ynAcvh0-WUmwMN7vgflau4LH4YRFLf986OYRZ3M2Znk,16765
261
+ keras_hub/src/models/gpt2/gpt2_causal_lm.py,sha256=i2veiYijPTsr6RqH5pR93uDw7rU9otQttdFBuVUxA80,17380
262
262
  keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py,sha256=3AD1LBFJ-u6bDdrwKa1LbINlEblZkhwB2sMJx-XEUZk,2992
263
263
  keras_hub/src/models/gpt2/gpt2_preprocessor.py,sha256=eYMIXw8Oebsr14GhqBh1CEhbLbIK3WnLUxaXj25fFpQ,3179
264
264
  keras_hub/src/models/gpt2/gpt2_presets.py,sha256=1mflR1dVuEwFfNe3Fkra6vt7DrjmkAckjyP-LclNLFc,1897
@@ -397,7 +397,7 @@ keras_hub/src/models/qwen3/qwen3_causal_lm.py,sha256=cn_4WFVxhlOArtIGAaqkNzIz9Rx
397
397
  keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py,sha256=H4g-bgvuhAUnDwjJovydK16Kes38ZFZWPvflrgHqZis,458
398
398
  keras_hub/src/models/qwen3/qwen3_decoder.py,sha256=68s9jQj53zFmXE4-SGXKYHu546fXOyi9LUbnKk-HGYY,11595
399
399
  keras_hub/src/models/qwen3/qwen3_layernorm.py,sha256=EJxjf7Pr6ufPQnNeuYQxkExzPjPk4PQxqMsoBeSEkDo,1073
400
- keras_hub/src/models/qwen3/qwen3_presets.py,sha256=eAqRbjLyRTSXcN-jnGHqoCHejKm2gmt8_zL4EPoE-JA,2518
400
+ keras_hub/src/models/qwen3/qwen3_presets.py,sha256=3ml8rh9dmHCt_TP85xSmkoOxEd5cx5UJ1ts1yB5tTLo,3922
401
401
  keras_hub/src/models/qwen3/qwen3_tokenizer.py,sha256=LmPtg0vprMchDvYfTj8m5PraXI2QS3-YgdIIpIm5iAs,1448
402
402
  keras_hub/src/models/qwen3_moe/__init__.py,sha256=0jp5BHZ8O8cCrp4g6VWWDUwB5_fSDXvCVCSf6Q0UB6o,273
403
403
  keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py,sha256=rZnzWA-cAhuWSuHSJfrNqf5_Cu0PNEe7PKbPNbhJdeM,13355
@@ -476,10 +476,12 @@ keras_hub/src/models/siglip/siglip_presets.py,sha256=jtIQrNcq14othG1QgwBEfozEmoH
476
476
  keras_hub/src/models/siglip/siglip_text_encoder.py,sha256=xOVvzyQHLX9ne30y4ussar99gNMXPXHYKlkbCX_On2Y,5380
477
477
  keras_hub/src/models/siglip/siglip_tokenizer.py,sha256=j_67JbIHJDRk-CbiemG2dgAO6lp3_0_JdnfroZ90G18,2579
478
478
  keras_hub/src/models/siglip/siglip_vision_encoder.py,sha256=CaNaFq5thBC3TUXXOf2qknk5vWsauM20ZoaDPYRnXcs,5927
479
+ keras_hub/src/models/smollm3/__init__.py,sha256=_svUTRF_mnFqe2cPP03CASKbDes6FvI76y_3TEHaxes,263
479
480
  keras_hub/src/models/smollm3/smollm3_backbone.py,sha256=9e8ydwy7X0stVEWgIJYt69vt6JYSCiYpM73w6oLxyoQ,7949
480
481
  keras_hub/src/models/smollm3/smollm3_causal_lm.py,sha256=PWn2zPu0YS3uRvmjksmXKXpxehl8lvEHAXaO0u7nweM,12641
481
482
  keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py,sha256=gbj7IhDbgA30AM80uG6BeI1yZmGd5yQ2VEaPWWyS9M4,3121
482
483
  keras_hub/src/models/smollm3/smollm3_layers.py,sha256=lR33IynX-1G42L3hNzOBnnIx-INOzJguSQDAwIPaSIQ,26632
484
+ keras_hub/src/models/smollm3/smollm3_presets.py,sha256=-DRT6uUEa3b7KFpgUmAToh-kXKfyGiNrxAMz-0R8R6E,499
483
485
  keras_hub/src/models/smollm3/smollm3_tokenizer.py,sha256=evOVM8pgZUkWLoXAwWiYRSNNFZ7KBv1WtFdLqpHdCQU,1877
484
486
  keras_hub/src/models/smollm3/smollm3_utils.py,sha256=zAqtZTv1G--k-Dbjvk886OcmsuIxyYicRiUQXcpyud0,1904
485
487
  keras_hub/src/models/stable_diffusion_3/__init__.py,sha256=ZKYQuaRObyhKq8GVAHmoRvlXp6FpU8ChvutVCHyXKuc,343
@@ -607,6 +609,7 @@ keras_hub/src/utils/transformers/convert_dinov3.py,sha256=rZqowTASKSAQQ1HrwlD9_t
607
609
  keras_hub/src/utils/transformers/convert_distilbert.py,sha256=SlfIRhSRk5c1ir2HGiDPiXa5XdOId_DbcnZO9lbwyZ8,6498
608
610
  keras_hub/src/utils/transformers/convert_esm.py,sha256=rOgGnNY37ZbYnoVC3L-Y-yGGAxTRmYtQV0nJoandH2Y,6214
609
611
  keras_hub/src/utils/transformers/convert_gemma.py,sha256=ElCgwBpSN5Q7rV5PJawTsoytPzs5ZjuwoY60YAe8y_A,6533
612
+ keras_hub/src/utils/transformers/convert_gemma3.py,sha256=TKdZh40_xHWXd9vdp7TY2EnDFXMJ8pOWoUsmFwx5EgE,14680
610
613
  keras_hub/src/utils/transformers/convert_gpt2.py,sha256=HCeHN_-GiQJRxLCM9OCJJ1watPVpIBF8ujS8pGbBOWc,5703
611
614
  keras_hub/src/utils/transformers/convert_llama3.py,sha256=DjVUyQbl4AV-h8VqSIzmxiCd7cYOKIJTYoLM__NtyY0,6413
612
615
  keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS7eANJUXIsNy1RxWXy20Gqw,4760
@@ -619,13 +622,13 @@ keras_hub/src/utils/transformers/convert_qwen_moe.py,sha256=a7R28aln-PdAcNuKAXdr
619
622
  keras_hub/src/utils/transformers/convert_smollm3.py,sha256=V2vWES85YSNXNx39I8OwAcOvSpb9KxUscrDr7ra-LPA,5281
620
623
  keras_hub/src/utils/transformers/convert_t5gemma.py,sha256=DPOwd61UhjspKuCsk3_EaNvSADGP_f8KLcZARHYVk5Y,9490
621
624
  keras_hub/src/utils/transformers/convert_vit.py,sha256=YAmXh519ecSgEO5B4g-aEQg1Bb_6ifFafLMqDTfLn_c,5259
622
- keras_hub/src/utils/transformers/preset_loader.py,sha256=alzuIEhDI6gLpEw05wPJVbOJ2LhwmLB_s7JhDqkb4ec,5364
625
+ keras_hub/src/utils/transformers/preset_loader.py,sha256=h9Oaodlyri25Ipb44C5zvMJ15m9IRxrykYjxsFSJWto,5795
623
626
  keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
624
627
  keras_hub/src/utils/transformers/export/gemma.py,sha256=xX_vfQwvFZ_-lQX4kgMNOGKL7fL_1yk7QyGYV2Qyly4,4699
625
628
  keras_hub/src/utils/transformers/export/hf_exporter.py,sha256=Qk52c6LIA2eMHUNY9Vy4STJSpnhLMdJ_t-3ljqhSr4k,5081
626
629
  keras_hub/tokenizers/__init__.py,sha256=XFOxDmM1Mz9TxiE8ICZK_-yTTyRFywUUiVwRIzz2QZ8,4770
627
630
  keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
628
- keras_hub_nightly-0.24.0.dev202511090424.dist-info/METADATA,sha256=sQwmYQhujdR3LE7rPYvUYCmmx0jOFzEMRXOv3QqnASA,7395
629
- keras_hub_nightly-0.24.0.dev202511090424.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
630
- keras_hub_nightly-0.24.0.dev202511090424.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
631
- keras_hub_nightly-0.24.0.dev202511090424.dist-info/RECORD,,
631
+ keras_hub_nightly-0.24.0.dev202512090431.dist-info/METADATA,sha256=qqz7talwW8gweE-PoKMGj24qb4yQhFmC6Yk2ObRdJdw,7395
632
+ keras_hub_nightly-0.24.0.dev202512090431.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
633
+ keras_hub_nightly-0.24.0.dev202512090431.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
634
+ keras_hub_nightly-0.24.0.dev202512090431.dist-info/RECORD,,