keras-hub-nightly 0.23.0.dev202510150419__py3-none-any.whl → 0.24.0.dev202511080419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/layers/__init__.py +3 -0
- keras_hub/models/__init__.py +27 -0
- keras_hub/src/layers/modeling/reversible_embedding.py +6 -0
- keras_hub/src/models/causal_lm.py +5 -0
- keras_hub/src/models/depth_anything/depth_anything_presets.py +38 -1
- keras_hub/src/models/dinov2/dinov2_layers.py +3 -1
- keras_hub/src/models/dinov3/__init__.py +5 -0
- keras_hub/src/models/dinov3/dinov3_backbone.py +263 -0
- keras_hub/src/models/dinov3/dinov3_image_converter.py +8 -0
- keras_hub/src/models/dinov3/dinov3_layers.py +1013 -0
- keras_hub/src/models/dinov3/dinov3_presets.py +4 -0
- keras_hub/src/models/gemma/gemma_presets.py +22 -0
- keras_hub/src/models/gemma3/gemma3_presets.py +39 -0
- keras_hub/src/models/image_to_image.py +5 -0
- keras_hub/src/models/inpaint.py +5 -0
- keras_hub/src/models/mobilenetv5/__init__.py +9 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_presets.py +15 -0
- keras_hub/src/models/parseq/__init__.py +5 -0
- keras_hub/src/models/parseq/parseq_presets.py +15 -0
- keras_hub/src/models/siglip/siglip_presets.py +15 -0
- keras_hub/src/models/smollm3/smollm3_backbone.py +211 -0
- keras_hub/src/models/smollm3/smollm3_causal_lm.py +310 -0
- keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py +84 -0
- keras_hub/src/models/smollm3/smollm3_layers.py +757 -0
- keras_hub/src/models/smollm3/smollm3_tokenizer.py +60 -0
- keras_hub/src/models/smollm3/smollm3_utils.py +56 -0
- keras_hub/src/models/text_to_image.py +5 -0
- keras_hub/src/utils/tensor_utils.py +3 -1
- keras_hub/src/utils/transformers/convert_dinov3.py +106 -0
- keras_hub/src/utils/transformers/convert_smollm3.py +139 -0
- keras_hub/src/utils/transformers/preset_loader.py +6 -0
- keras_hub/src/version.py +1 -1
- keras_hub/tokenizers/__init__.py +6 -0
- {keras_hub_nightly-0.23.0.dev202510150419.dist-info → keras_hub_nightly-0.24.0.dev202511080419.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.23.0.dev202510150419.dist-info → keras_hub_nightly-0.24.0.dev202511080419.dist-info}/RECORD +37 -22
- {keras_hub_nightly-0.23.0.dev202510150419.dist-info → keras_hub_nightly-0.24.0.dev202511080419.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.23.0.dev202510150419.dist-info → keras_hub_nightly-0.24.0.dev202511080419.dist-info}/top_level.txt +0 -0
|
@@ -206,4 +206,26 @@ backbone_presets = {
|
|
|
206
206
|
},
|
|
207
207
|
"kaggle_handle": "kaggle://keras/vaultgemma/keras/vault_gemma_1b_en/2",
|
|
208
208
|
},
|
|
209
|
+
"c2s_scale_gemma_2_2b_en": {
|
|
210
|
+
"metadata": {
|
|
211
|
+
"description": (
|
|
212
|
+
"A 2 billion parameter, single-cell biology-aware model "
|
|
213
|
+
"built on the Gemma-2 architecture."
|
|
214
|
+
),
|
|
215
|
+
"params": 2614341888,
|
|
216
|
+
"path": "gemma",
|
|
217
|
+
},
|
|
218
|
+
"kaggle_handle": "kaggle://keras/cell2sentence/keras/c2s_scale_gemma_2_2b_en/1",
|
|
219
|
+
},
|
|
220
|
+
"c2s_scale_gemma_2_27b_en": {
|
|
221
|
+
"metadata": {
|
|
222
|
+
"description": (
|
|
223
|
+
"A 27 billion parameter, single-cell biology-aware model "
|
|
224
|
+
"built on the Gemma-2 architecture."
|
|
225
|
+
),
|
|
226
|
+
"params": 27227128320,
|
|
227
|
+
"path": "gemma",
|
|
228
|
+
},
|
|
229
|
+
"kaggle_handle": "kaggle://keras/cell2sentence/keras/c2s_scale_gemma_2_27b_en/1",
|
|
230
|
+
},
|
|
209
231
|
}
|
|
@@ -181,4 +181,43 @@ backbone_presets = {
|
|
|
181
181
|
},
|
|
182
182
|
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_270m/4",
|
|
183
183
|
},
|
|
184
|
+
"medgemma_instruct_4b": {
|
|
185
|
+
"metadata": {
|
|
186
|
+
"description": (
|
|
187
|
+
"A 4 billion parameter model based on Gemma 3. "
|
|
188
|
+
"This model is trained for performance on medical text"
|
|
189
|
+
"and image comprehension and is optimized for medical"
|
|
190
|
+
"applications that involve a text generation component."
|
|
191
|
+
),
|
|
192
|
+
"params": 4300079472,
|
|
193
|
+
"path": "gemma3",
|
|
194
|
+
},
|
|
195
|
+
"kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_instruct_4b/1",
|
|
196
|
+
},
|
|
197
|
+
"medgemma_instruct_27b": {
|
|
198
|
+
"metadata": {
|
|
199
|
+
"description": (
|
|
200
|
+
"A 27 billion parameter model based on Gemma 3. "
|
|
201
|
+
"This model trained for performance on medical text "
|
|
202
|
+
"and image comprehension and is optimized for medical "
|
|
203
|
+
"applications that involve a text generation component."
|
|
204
|
+
),
|
|
205
|
+
"params": 27432406640,
|
|
206
|
+
"path": "gemma3",
|
|
207
|
+
},
|
|
208
|
+
"kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_instruct_27b/1",
|
|
209
|
+
},
|
|
210
|
+
"medgemma_instruct_27b_text": {
|
|
211
|
+
"metadata": {
|
|
212
|
+
"description": (
|
|
213
|
+
"A 27 billion parameter text-only model based on Gemma 3. "
|
|
214
|
+
"This model is trained for performance on medical text "
|
|
215
|
+
"comprehension and is optimized for medical applications "
|
|
216
|
+
"that involve a text generation component."
|
|
217
|
+
),
|
|
218
|
+
"params": 27009002240,
|
|
219
|
+
"path": "gemma3",
|
|
220
|
+
},
|
|
221
|
+
"kaggle_handle": "kaggle://keras/medgemma/keras/medgemma_instruct_27b_text/1",
|
|
222
|
+
},
|
|
184
223
|
}
|
|
@@ -415,3 +415,8 @@ class ImageToImage(Task):
|
|
|
415
415
|
# Image-to-image.
|
|
416
416
|
outputs = [generate(*x) for x in inputs]
|
|
417
417
|
return self._normalize_generate_outputs(outputs, input_is_scalar)
|
|
418
|
+
|
|
419
|
+
def _post_quantize(self, mode, **kwargs):
|
|
420
|
+
super()._post_quantize(mode, **kwargs)
|
|
421
|
+
# Reset the compiled generate function.
|
|
422
|
+
self.generate_function = None
|
keras_hub/src/models/inpaint.py
CHANGED
|
@@ -518,3 +518,8 @@ class Inpaint(Task):
|
|
|
518
518
|
# Inpaint.
|
|
519
519
|
outputs = [generate(*x) for x in inputs]
|
|
520
520
|
return self._normalize_generate_outputs(outputs, input_is_scalar)
|
|
521
|
+
|
|
522
|
+
def _post_quantize(self, mode, **kwargs):
|
|
523
|
+
super()._post_quantize(mode, **kwargs)
|
|
524
|
+
# Reset the compiled generate function.
|
|
525
|
+
self.generate_function = None
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
|
|
2
|
+
MobileNetV5Backbone,
|
|
3
|
+
)
|
|
4
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_presets import (
|
|
5
|
+
backbone_presets,
|
|
6
|
+
)
|
|
7
|
+
from keras_hub.src.utils.preset_utils import register_presets
|
|
8
|
+
|
|
9
|
+
register_presets(backbone_presets, MobileNetV5Backbone)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""MobileNetV5 preset configurations."""
|
|
2
|
+
|
|
3
|
+
backbone_presets = {
|
|
4
|
+
"mobilenetv5_300m_enc_gemma3n": {
|
|
5
|
+
"metadata": {
|
|
6
|
+
"description": (
|
|
7
|
+
"Lightweight 300M-parameter convolutional vision encoder used "
|
|
8
|
+
"as the image backbone for Gemma 3n"
|
|
9
|
+
),
|
|
10
|
+
"params": 294_284_096,
|
|
11
|
+
"path": "mobilenetv5",
|
|
12
|
+
},
|
|
13
|
+
"kaggle_handle": "kaggle://keras/mobilenetv5/keras/mobilenetv5_300m_enc_gemma3n/1",
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""PARSeq preset configurations."""
|
|
2
|
+
|
|
3
|
+
backbone_presets = {
|
|
4
|
+
"parseq": {
|
|
5
|
+
"metadata": {
|
|
6
|
+
"description": (
|
|
7
|
+
"Permuted autoregressive sequence (PARSeq) base "
|
|
8
|
+
"model for scene text recognition"
|
|
9
|
+
),
|
|
10
|
+
"params": 23_832_671,
|
|
11
|
+
"path": "parseq",
|
|
12
|
+
},
|
|
13
|
+
"kaggle_handle": "kaggle://keras/parseq/keras/parseq/1",
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -321,4 +321,19 @@ backbone_presets = {
|
|
|
321
321
|
},
|
|
322
322
|
"kaggle_handle": "kaggle://keras/siglip/keras/siglip2_so400m_patch16_512/1",
|
|
323
323
|
},
|
|
324
|
+
"medsiglip_900m_448": {
|
|
325
|
+
"metadata": {
|
|
326
|
+
"description": (
|
|
327
|
+
"A 900 million parameter variant of SigLIP trained to encode "
|
|
328
|
+
"medical images and text into a common embedding space. "
|
|
329
|
+
"MedSigLIP contains a vision encoder and a text encoder, and "
|
|
330
|
+
"supports 448x448 image resolution with up to 64 text tokens."
|
|
331
|
+
),
|
|
332
|
+
"params": 878301426,
|
|
333
|
+
"official_name": "SigLIP2",
|
|
334
|
+
"path": "siglip",
|
|
335
|
+
"model_card": "https://huggingface.co/google/medsiglip-448#medsiglip-model-card",
|
|
336
|
+
},
|
|
337
|
+
"kaggle_handle": "kaggle://keras/medsiglip/keras/medsiglip_900m_448/1",
|
|
338
|
+
},
|
|
324
339
|
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import keras
|
|
2
|
+
|
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
|
4
|
+
from keras_hub.src.layers.modeling.reversible_embedding import (
|
|
5
|
+
ReversibleEmbedding,
|
|
6
|
+
)
|
|
7
|
+
from keras_hub.src.models.backbone import Backbone
|
|
8
|
+
from keras_hub.src.models.smollm3.smollm3_layers import SmolLM3DecoderLayer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@keras_hub_export(
|
|
12
|
+
[
|
|
13
|
+
"keras_hub.models.SmolLM3Backbone",
|
|
14
|
+
"keras_hub.models.SmolLMBackbone",
|
|
15
|
+
]
|
|
16
|
+
)
|
|
17
|
+
class SmolLM3Backbone(Backbone):
|
|
18
|
+
"""SmolLM3 core network with hyperparameters.
|
|
19
|
+
|
|
20
|
+
This network implements a Transformer-based decoder network,
|
|
21
|
+
SmolLM3, as described in the SmolLM3 model architecture.
|
|
22
|
+
It includes the embedding lookups and transformer layers.
|
|
23
|
+
|
|
24
|
+
The default constructor gives a fully customizable, randomly initialized
|
|
25
|
+
SmolLM3 model with any number of layers, heads, and embedding
|
|
26
|
+
dimensions. To load preset architectures and weights, use the `from_preset`
|
|
27
|
+
constructor.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
vocabulary_size: int. The size of the token vocabulary.
|
|
31
|
+
hidden_dim: int. The size of the transformer hidden state at the end
|
|
32
|
+
of each transformer layer.
|
|
33
|
+
intermediate_dim: int. The output dimension of the first Dense layer in
|
|
34
|
+
the MLP network of each transformer layer.
|
|
35
|
+
num_layers: int. The number of transformer layers.
|
|
36
|
+
num_attention_heads: int. The number of attention heads for each
|
|
37
|
+
transformer layer.
|
|
38
|
+
num_key_value_heads: int. The number of key-value heads for grouped
|
|
39
|
+
query attention in each transformer layer.
|
|
40
|
+
attention_bias: bool. Whether to use bias in the query, key, value, and
|
|
41
|
+
output projection layers in the attention blocks.
|
|
42
|
+
attention_dropout: float. Dropout probability for the attention layers.
|
|
43
|
+
rope_layer_enabled_list: list of bool. List indicating whether RoPE
|
|
44
|
+
(Rotary Position Embedding) is enabled for each layer. Typically,
|
|
45
|
+
some layers may disable RoPE for architectural variations.
|
|
46
|
+
layer_types: list of str. List of layer types for each transformer
|
|
47
|
+
layer (e.g., "attention" or other custom types).
|
|
48
|
+
mlp_bias: bool. Whether to use bias in the MLP (feedforward) layers.
|
|
49
|
+
layer_norm_epsilon: float. Epsilon value for layer normalization layers
|
|
50
|
+
to prevent division by zero.
|
|
51
|
+
max_position_embeddings: int. The maximum sequence length that this
|
|
52
|
+
model might ever be used with.
|
|
53
|
+
rope_theta: float. The base period of the RoPE embeddings.
|
|
54
|
+
partial_rotary_factor: float. The percentage of hidden dimensions to
|
|
55
|
+
rotate in RoPE. A value of 1.0 rotates all dimensions, while values
|
|
56
|
+
less than 1.0 only rotate a subset.
|
|
57
|
+
|
|
58
|
+
Examples:
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
input_data = {
|
|
62
|
+
"token_ids": np.ones(shape=(1, 12), dtype="int32"),
|
|
63
|
+
"padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# Pretrained SmolLM3 decoder.
|
|
67
|
+
model = keras_hub.models.SmolLM3Backbone.from_preset(
|
|
68
|
+
"hf://HuggingFaceTB/SmolLM3-3B"
|
|
69
|
+
)
|
|
70
|
+
model(input_data)
|
|
71
|
+
|
|
72
|
+
# Randomly initialized SmolLM3 decoder with custom config.
|
|
73
|
+
model = keras_hub.models.SmolLM3Backbone(
|
|
74
|
+
vocabulary_size=49152,
|
|
75
|
+
hidden_dim=576,
|
|
76
|
+
intermediate_dim=1536,
|
|
77
|
+
num_layers=30,
|
|
78
|
+
num_attention_heads=9,
|
|
79
|
+
num_key_value_heads=3,
|
|
80
|
+
attention_bias=False,
|
|
81
|
+
attention_dropout=0.0,
|
|
82
|
+
rope_layer_enabled_list=[True] * 30,
|
|
83
|
+
layer_types=["attention"] * 30,
|
|
84
|
+
mlp_bias=False,
|
|
85
|
+
layer_norm_epsilon=1e-5,
|
|
86
|
+
max_position_embeddings=2048,
|
|
87
|
+
rope_theta=10000.0,
|
|
88
|
+
partial_rotary_factor=1.0,
|
|
89
|
+
)
|
|
90
|
+
model(input_data)
|
|
91
|
+
```
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
vocabulary_size,
|
|
97
|
+
hidden_dim,
|
|
98
|
+
intermediate_dim,
|
|
99
|
+
num_layers,
|
|
100
|
+
num_attention_heads,
|
|
101
|
+
num_key_value_heads,
|
|
102
|
+
attention_bias,
|
|
103
|
+
attention_dropout,
|
|
104
|
+
rope_layer_enabled_list,
|
|
105
|
+
layer_types,
|
|
106
|
+
mlp_bias,
|
|
107
|
+
layer_norm_epsilon,
|
|
108
|
+
max_position_embeddings,
|
|
109
|
+
rope_theta,
|
|
110
|
+
partial_rotary_factor,
|
|
111
|
+
**kwargs,
|
|
112
|
+
):
|
|
113
|
+
# === Layers ===
|
|
114
|
+
self.token_embedding = ReversibleEmbedding(
|
|
115
|
+
input_dim=vocabulary_size,
|
|
116
|
+
output_dim=hidden_dim,
|
|
117
|
+
name="token_embedding",
|
|
118
|
+
)
|
|
119
|
+
self.transformer_layers = []
|
|
120
|
+
for i in range(num_layers):
|
|
121
|
+
layer = SmolLM3DecoderLayer(
|
|
122
|
+
hidden_size=hidden_dim,
|
|
123
|
+
num_attention_heads=num_attention_heads,
|
|
124
|
+
num_key_value_heads=num_key_value_heads,
|
|
125
|
+
attention_bias=attention_bias,
|
|
126
|
+
attention_dropout=attention_dropout,
|
|
127
|
+
rope_layer_enabled_list=rope_layer_enabled_list,
|
|
128
|
+
layer_types=layer_types,
|
|
129
|
+
layer_idx=i,
|
|
130
|
+
intermediate_size=intermediate_dim,
|
|
131
|
+
mlp_bias=mlp_bias,
|
|
132
|
+
layer_norm_epsilon=layer_norm_epsilon,
|
|
133
|
+
max_position_embeddings=max_position_embeddings,
|
|
134
|
+
rope_theta=rope_theta,
|
|
135
|
+
partial_rotary_factor=partial_rotary_factor,
|
|
136
|
+
name=f"transformer_layer_{i}",
|
|
137
|
+
)
|
|
138
|
+
self.transformer_layers.append(layer)
|
|
139
|
+
|
|
140
|
+
self.norm = keras.layers.RMSNormalization(
|
|
141
|
+
epsilon=layer_norm_epsilon,
|
|
142
|
+
name="sequence_output_layernorm",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# === Functional Model ===
|
|
146
|
+
token_id_input = keras.Input(
|
|
147
|
+
shape=(None,), dtype="int32", name="token_ids"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
padding_mask_input = keras.Input(
|
|
151
|
+
shape=(None,), dtype="int32", name="padding_mask"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
x = self.token_embedding(token_id_input)
|
|
155
|
+
|
|
156
|
+
for decoder_layer in self.transformer_layers:
|
|
157
|
+
x = decoder_layer(
|
|
158
|
+
x,
|
|
159
|
+
decoder_padding_mask=padding_mask_input,
|
|
160
|
+
**kwargs,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
sequence_output = self.norm(x)
|
|
164
|
+
super().__init__(
|
|
165
|
+
inputs={
|
|
166
|
+
"token_ids": token_id_input,
|
|
167
|
+
"padding_mask": padding_mask_input,
|
|
168
|
+
},
|
|
169
|
+
outputs=sequence_output,
|
|
170
|
+
**kwargs,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# === Config ===
|
|
174
|
+
self.vocabulary_size = vocabulary_size
|
|
175
|
+
self.hidden_dim = hidden_dim
|
|
176
|
+
self.intermediate_dim = intermediate_dim
|
|
177
|
+
self.num_layers = num_layers
|
|
178
|
+
self.num_attention_heads = num_attention_heads
|
|
179
|
+
self.num_key_value_heads = num_key_value_heads
|
|
180
|
+
self.attention_bias = attention_bias
|
|
181
|
+
self.attention_dropout = attention_dropout
|
|
182
|
+
self.rope_layer_enabled_list = rope_layer_enabled_list
|
|
183
|
+
self.layer_types = layer_types
|
|
184
|
+
self.mlp_bias = mlp_bias
|
|
185
|
+
self.layer_norm_epsilon = layer_norm_epsilon
|
|
186
|
+
self.max_position_embeddings = max_position_embeddings
|
|
187
|
+
self.rope_theta = rope_theta
|
|
188
|
+
self.partial_rotary_factor = partial_rotary_factor
|
|
189
|
+
|
|
190
|
+
def get_config(self):
|
|
191
|
+
config = super().get_config()
|
|
192
|
+
config.update(
|
|
193
|
+
{
|
|
194
|
+
"vocabulary_size": self.vocabulary_size,
|
|
195
|
+
"hidden_dim": self.hidden_dim,
|
|
196
|
+
"intermediate_dim": self.intermediate_dim,
|
|
197
|
+
"num_layers": self.num_layers,
|
|
198
|
+
"num_attention_heads": self.num_attention_heads,
|
|
199
|
+
"num_key_value_heads": self.num_key_value_heads,
|
|
200
|
+
"attention_bias": self.attention_bias,
|
|
201
|
+
"attention_dropout": self.attention_dropout,
|
|
202
|
+
"rope_layer_enabled_list": self.rope_layer_enabled_list,
|
|
203
|
+
"layer_types": self.layer_types,
|
|
204
|
+
"mlp_bias": self.mlp_bias,
|
|
205
|
+
"layer_norm_epsilon": self.layer_norm_epsilon,
|
|
206
|
+
"max_position_embeddings": self.max_position_embeddings,
|
|
207
|
+
"rope_theta": self.rope_theta,
|
|
208
|
+
"partial_rotary_factor": self.partial_rotary_factor,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
return config
|