keras-hub-nightly 0.24.0.dev202511090424__py3-none-any.whl → 0.24.0.dev202512090431__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of keras-hub-nightly might be problematic. Click here for more details.
- keras_hub/src/models/causal_lm.py +22 -0
- keras_hub/src/models/dinov3/dinov3_presets.py +90 -1
- keras_hub/src/models/esm/esm_attention.py +11 -4
- keras_hub/src/models/gemma/gemma_causal_lm.py +16 -0
- keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py +8 -3
- keras_hub/src/models/gemma3/gemma3_tokenizer.py +20 -8
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +17 -0
- keras_hub/src/models/masked_lm.py +22 -0
- keras_hub/src/models/qwen3/qwen3_presets.py +36 -0
- keras_hub/src/models/smollm3/__init__.py +5 -0
- keras_hub/src/models/smollm3/smollm3_presets.py +16 -0
- keras_hub/src/utils/transformers/convert_gemma3.py +353 -0
- keras_hub/src/utils/transformers/preset_loader.py +9 -0
- keras_hub/src/version.py +1 -1
- {keras_hub_nightly-0.24.0.dev202511090424.dist-info → keras_hub_nightly-0.24.0.dev202512090431.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.24.0.dev202511090424.dist-info → keras_hub_nightly-0.24.0.dev202512090431.dist-info}/RECORD +18 -15
- {keras_hub_nightly-0.24.0.dev202511090424.dist-info → keras_hub_nightly-0.24.0.dev202512090431.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.24.0.dev202511090424.dist-info → keras_hub_nightly-0.24.0.dev202512090431.dist-info}/top_level.txt +0 -0
|
@@ -429,3 +429,25 @@ class CausalLM(Task):
|
|
|
429
429
|
super()._post_quantize(mode, **kwargs)
|
|
430
430
|
# Reset the compiled generate function.
|
|
431
431
|
self.generate_function = None
|
|
432
|
+
|
|
433
|
+
def get_quantization_layer_structure(self, mode):
|
|
434
|
+
if mode != "gptq":
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
backbone = self.backbone
|
|
438
|
+
# Check for standard backbone structure.
|
|
439
|
+
if not hasattr(backbone, "transformer_layers"):
|
|
440
|
+
return None
|
|
441
|
+
|
|
442
|
+
# Check for embedding.
|
|
443
|
+
embedding = getattr(backbone, "token_embedding", None)
|
|
444
|
+
if embedding is None:
|
|
445
|
+
embedding = getattr(backbone, "embedding", None)
|
|
446
|
+
|
|
447
|
+
if embedding is None:
|
|
448
|
+
return None
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
"pre_block_layers": [embedding],
|
|
452
|
+
"sequential_blocks": backbone.transformer_layers,
|
|
453
|
+
}
|
|
@@ -1,4 +1,93 @@
|
|
|
1
1
|
"""DINOV3 model preset configurations."""
|
|
2
2
|
|
|
3
3
|
# Metadata for loading pretrained model weights.
|
|
4
|
-
backbone_presets = {
|
|
4
|
+
backbone_presets = {
|
|
5
|
+
"dinov3_vit_small_lvd1689m": {
|
|
6
|
+
"metadata": {
|
|
7
|
+
"description": (
|
|
8
|
+
"Vision Transformer (small-sized model) trained on LVD-1689M "
|
|
9
|
+
"using DINOv3."
|
|
10
|
+
),
|
|
11
|
+
"params": 21_600_000,
|
|
12
|
+
"path": "dinov3",
|
|
13
|
+
},
|
|
14
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_small_lvd1689m/1",
|
|
15
|
+
},
|
|
16
|
+
"dinov3_vit_small_plus_lvd1689m": {
|
|
17
|
+
"metadata": {
|
|
18
|
+
"description": (
|
|
19
|
+
"Vision Transformer (small-plus-sized model) trained on "
|
|
20
|
+
"LVD-1689M using DINOv3."
|
|
21
|
+
),
|
|
22
|
+
"params": 29_000_000,
|
|
23
|
+
"path": "dinov3",
|
|
24
|
+
},
|
|
25
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_small_plus_lvd1689m/1",
|
|
26
|
+
},
|
|
27
|
+
"dinov3_vit_base_lvd1689m": {
|
|
28
|
+
"metadata": {
|
|
29
|
+
"description": (
|
|
30
|
+
"Vision Transformer (base-sized model) trained on LVD-1689M "
|
|
31
|
+
"using DINOv3."
|
|
32
|
+
),
|
|
33
|
+
"params": 86_000_000,
|
|
34
|
+
"path": "dinov3",
|
|
35
|
+
},
|
|
36
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_base_lvd1689m/1",
|
|
37
|
+
},
|
|
38
|
+
"dinov3_vit_large_lvd1689m": {
|
|
39
|
+
"metadata": {
|
|
40
|
+
"description": (
|
|
41
|
+
"Vision Transformer (large-sized model) trained on LVD-1689M "
|
|
42
|
+
"using DINOv3."
|
|
43
|
+
),
|
|
44
|
+
"params": 300_000_000,
|
|
45
|
+
"path": "dinov3",
|
|
46
|
+
},
|
|
47
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_large_lvd1689m/1",
|
|
48
|
+
},
|
|
49
|
+
"dinov3_vit_huge_plus_lvd1689m": {
|
|
50
|
+
"metadata": {
|
|
51
|
+
"description": (
|
|
52
|
+
"Vision Transformer (huge-plus-sized model) trained on "
|
|
53
|
+
"LVD-1689M using DINOv3."
|
|
54
|
+
),
|
|
55
|
+
"params": 840_000_000,
|
|
56
|
+
"path": "dinov3",
|
|
57
|
+
},
|
|
58
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_huge_plus_lvd1689m/1",
|
|
59
|
+
},
|
|
60
|
+
"dinov3_vit_7b_lvd1689m": {
|
|
61
|
+
"metadata": {
|
|
62
|
+
"description": (
|
|
63
|
+
"Vision Transformer (7B-sized model) trained on LVD-1689M "
|
|
64
|
+
"using DINOv3."
|
|
65
|
+
),
|
|
66
|
+
"params": 6_700_000_000,
|
|
67
|
+
"path": "dinov3",
|
|
68
|
+
},
|
|
69
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_7b_lvd1689m/1",
|
|
70
|
+
},
|
|
71
|
+
"dinov3_vit_large_sat493m": {
|
|
72
|
+
"metadata": {
|
|
73
|
+
"description": (
|
|
74
|
+
"Vision Transformer (large-sized model) trained on SAT-493M "
|
|
75
|
+
"using DINOv3."
|
|
76
|
+
),
|
|
77
|
+
"params": 300_000_000,
|
|
78
|
+
"path": "dinov3",
|
|
79
|
+
},
|
|
80
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_large_sat493m/1",
|
|
81
|
+
},
|
|
82
|
+
"dinov3_vit_7b_sat493m": {
|
|
83
|
+
"metadata": {
|
|
84
|
+
"description": (
|
|
85
|
+
"Vision Transformer (7B-sized model) trained on SAT-493M "
|
|
86
|
+
"using DINOv3."
|
|
87
|
+
),
|
|
88
|
+
"params": 6_700_000_000,
|
|
89
|
+
"path": "dinov3",
|
|
90
|
+
},
|
|
91
|
+
"kaggle_handle": "kaggle://keras/dinov3/keras/dinov3_vit_7b_sat493m/1",
|
|
92
|
+
},
|
|
93
|
+
}
|
|
@@ -14,7 +14,8 @@ class ESMRotaryEmbedding(RotaryEmbedding):
|
|
|
14
14
|
inv_freq = self.scaling_factor / (
|
|
15
15
|
self.max_wavelength ** (ops.arange(0, dim, 2, dtype=x.dtype) / dim)
|
|
16
16
|
)
|
|
17
|
-
|
|
17
|
+
# Use ops.shape for dynamic shape compatibility with TFLite
|
|
18
|
+
t = ops.arange(ops.shape(x)[position], dtype=x.dtype)
|
|
18
19
|
freqs = ops.outer(t, inv_freq)
|
|
19
20
|
emb = ops.concatenate((freqs, freqs), axis=-1)
|
|
20
21
|
|
|
@@ -32,11 +33,17 @@ class ESMRotaryEmbedding(RotaryEmbedding):
|
|
|
32
33
|
|
|
33
34
|
def rotate_half(self, x):
|
|
34
35
|
x1, x2 = ops.split(x, 2, -1)
|
|
35
|
-
|
|
36
|
+
# Avoid `ops.concatenate` to prevent XLA compilation issues on JAX
|
|
37
|
+
# backend. Use stack + reshape approach from base RotaryEmbedding.
|
|
38
|
+
half_rot_x = ops.stack((-x2, x1), axis=-2)
|
|
39
|
+
half_rot_x = ops.reshape(half_rot_x, ops.shape(x))
|
|
40
|
+
return half_rot_x
|
|
36
41
|
|
|
37
42
|
def apply_rotary_pos_emb(self, x, cos, sin):
|
|
38
|
-
|
|
39
|
-
|
|
43
|
+
# Use ops.shape for dynamic shape compatibility with TFLite
|
|
44
|
+
seq_len = ops.shape(x)[1]
|
|
45
|
+
cos = cos[:, :seq_len, :, :]
|
|
46
|
+
sin = sin[:, :seq_len, :, :]
|
|
40
47
|
|
|
41
48
|
return (x * cos) + (self.rotate_half(x) * sin)
|
|
42
49
|
|
|
@@ -431,3 +431,19 @@ class GemmaCausalLM(CausalLM):
|
|
|
431
431
|
)
|
|
432
432
|
per_token_loss = per_token_loss_fn(target_ids, logits)
|
|
433
433
|
return per_token_loss
|
|
434
|
+
|
|
435
|
+
def get_quantization_layer_structure(self, mode):
|
|
436
|
+
if mode != "gptq":
|
|
437
|
+
return None
|
|
438
|
+
|
|
439
|
+
# Wrap embedding + scaling
|
|
440
|
+
backbone = self.backbone
|
|
441
|
+
inputs = keras.Input(shape=(None,), dtype="int32")
|
|
442
|
+
x = backbone.token_embedding(inputs)
|
|
443
|
+
x = x * ops.cast(ops.sqrt(backbone.hidden_dim), x.dtype)
|
|
444
|
+
pre_processor = keras.Model(inputs=inputs, outputs=x)
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
"pre_block_layers": [pre_processor],
|
|
448
|
+
"sequential_blocks": backbone.transformer_layers,
|
|
449
|
+
}
|
|
@@ -283,9 +283,14 @@ class Gemma3CausalLMPreprocessor(CausalLMPreprocessor):
|
|
|
283
283
|
# is `None`.
|
|
284
284
|
self.text_only_model = self.image_converter is None
|
|
285
285
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
286
|
+
if self.text_only_model:
|
|
287
|
+
self.image_placeholder = None
|
|
288
|
+
self.start_of_image_token = None
|
|
289
|
+
self.end_of_image_token = None
|
|
290
|
+
else:
|
|
291
|
+
self.image_placeholder = self.tokenizer.image_placeholder
|
|
292
|
+
self.start_of_image_token = self.tokenizer.start_of_image_token
|
|
293
|
+
self.end_of_image_token = self.tokenizer.end_of_image_token
|
|
289
294
|
|
|
290
295
|
def build(self, input_shape):
|
|
291
296
|
# Defer packer creation to `build()` so that we can be sure tokenizer
|
|
@@ -77,20 +77,32 @@ class Gemma3Tokenizer(SentencePieceTokenizer):
|
|
|
77
77
|
|
|
78
78
|
backbone_cls = Gemma3Backbone
|
|
79
79
|
|
|
80
|
-
def __init__(self, proto, **kwargs):
|
|
80
|
+
def __init__(self, proto, has_vision_tokens=True, **kwargs):
|
|
81
81
|
# Add special tokens.
|
|
82
82
|
|
|
83
|
+
self.has_vision_tokens = has_vision_tokens
|
|
83
84
|
# The usual tokens.
|
|
84
85
|
self._add_special_token("<bos>", "start_token")
|
|
85
86
|
self._add_special_token("<eos>", "end_token")
|
|
86
87
|
self._add_special_token("<pad>", "pad_token")
|
|
87
88
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
89
|
+
if has_vision_tokens:
|
|
90
|
+
# Image placeholder token.
|
|
91
|
+
self._add_special_token("<img>", "image_placeholder")
|
|
92
|
+
# Some tokens which are used in the preprocessor.
|
|
93
|
+
# We need to keep them
|
|
94
|
+
# here so that the preprocessor works with tf.data.
|
|
95
|
+
self._add_special_token("<start_of_image>", "start_of_image_token")
|
|
96
|
+
self._add_special_token("<end_of_image>", "end_of_image_token")
|
|
97
|
+
else:
|
|
98
|
+
# For text-only, skip assigning token IDs or set to -1
|
|
99
|
+
self.start_of_image_token_id = -1
|
|
100
|
+
self.image_placeholder_token_id = -1
|
|
101
|
+
self.end_of_image_token_id = -1
|
|
95
102
|
|
|
96
103
|
super().__init__(proto=proto, **kwargs)
|
|
104
|
+
|
|
105
|
+
def get_config(self):
|
|
106
|
+
config = super().get_config()
|
|
107
|
+
config.update({"has_vision_tokens": self.has_vision_tokens})
|
|
108
|
+
return config
|
|
@@ -420,3 +420,20 @@ class GPT2CausalLM(CausalLM):
|
|
|
420
420
|
)
|
|
421
421
|
per_token_loss = per_token_loss_fn(target_ids, logits)
|
|
422
422
|
return per_token_loss
|
|
423
|
+
|
|
424
|
+
def get_quantization_layer_structure(self, mode):
|
|
425
|
+
if mode != "gptq":
|
|
426
|
+
return None
|
|
427
|
+
|
|
428
|
+
backbone = self.backbone
|
|
429
|
+
token_ids = keras.Input(shape=(None,), dtype="int32")
|
|
430
|
+
tokens = backbone.token_embedding(token_ids)
|
|
431
|
+
positions = backbone.position_embedding(tokens)
|
|
432
|
+
x = backbone.embeddings_add((tokens, positions))
|
|
433
|
+
x = backbone.embeddings_dropout(x)
|
|
434
|
+
pre_processor = keras.Model(inputs=token_ids, outputs=x)
|
|
435
|
+
|
|
436
|
+
return {
|
|
437
|
+
"pre_block_layers": [pre_processor],
|
|
438
|
+
"sequential_blocks": backbone.transformer_layers,
|
|
439
|
+
}
|
|
@@ -84,3 +84,25 @@ class MaskedLM(Task):
|
|
|
84
84
|
weighted_metrics=weighted_metrics,
|
|
85
85
|
**kwargs,
|
|
86
86
|
)
|
|
87
|
+
|
|
88
|
+
def get_quantization_layer_structure(self, mode):
|
|
89
|
+
if mode != "gptq":
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
backbone = self.backbone
|
|
93
|
+
# Check for standard backbone structure.
|
|
94
|
+
if not hasattr(backbone, "transformer_layers"):
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
# Check for embedding.
|
|
98
|
+
embedding = getattr(backbone, "token_embedding", None)
|
|
99
|
+
if embedding is None:
|
|
100
|
+
embedding = getattr(backbone, "embedding", None)
|
|
101
|
+
|
|
102
|
+
if embedding is None:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
"pre_block_layers": [embedding],
|
|
107
|
+
"sequential_blocks": backbone.transformer_layers,
|
|
108
|
+
}
|
|
@@ -70,4 +70,40 @@ backbone_presets = {
|
|
|
70
70
|
},
|
|
71
71
|
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_32b_en/1",
|
|
72
72
|
},
|
|
73
|
+
"qwen3_embedding_0.6b_en": {
|
|
74
|
+
"metadata": {
|
|
75
|
+
"description": (
|
|
76
|
+
"This text embedding model features a 32k context length and "
|
|
77
|
+
"offers flexible, user-defined embedding dimensions that can "
|
|
78
|
+
"range from 32 to 1024."
|
|
79
|
+
),
|
|
80
|
+
"params": 595776512,
|
|
81
|
+
"path": "qwen3",
|
|
82
|
+
},
|
|
83
|
+
"kaggle_handle": "kaggle://keras/qwen-3-embedding/keras/qwen3_embedding_0.6b_en/1",
|
|
84
|
+
},
|
|
85
|
+
"qwen3_embedding_4b_en": {
|
|
86
|
+
"metadata": {
|
|
87
|
+
"description": (
|
|
88
|
+
"This text embedding model features a 32k context length and "
|
|
89
|
+
"offers flexible, user-defined embedding dimensions that can "
|
|
90
|
+
"range from 32 to 2560."
|
|
91
|
+
),
|
|
92
|
+
"params": 4021774336,
|
|
93
|
+
"path": "qwen3",
|
|
94
|
+
},
|
|
95
|
+
"kaggle_handle": "kaggle://keras/qwen-3-embedding/keras/qwen3_embedding_4b_en/1",
|
|
96
|
+
},
|
|
97
|
+
"qwen3_embedding_8b_en": {
|
|
98
|
+
"metadata": {
|
|
99
|
+
"description": (
|
|
100
|
+
"This text embedding model features a 32k context length and "
|
|
101
|
+
"offers flexible, user-defined embedding dimensions that can "
|
|
102
|
+
"range from 32 to 4096."
|
|
103
|
+
),
|
|
104
|
+
"params": 8188515328,
|
|
105
|
+
"path": "qwen3",
|
|
106
|
+
},
|
|
107
|
+
"kaggle_handle": "kaggle://keras/qwen-3-embedding/keras/qwen3_embedding_8b_en/1",
|
|
108
|
+
},
|
|
73
109
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""SmolLM3 model preset configurations."""
|
|
2
|
+
|
|
3
|
+
backbone_presets = {
|
|
4
|
+
"smollm3_3b_en": {
|
|
5
|
+
"metadata": {
|
|
6
|
+
"description": (
|
|
7
|
+
"Dense decoder-only model has 3 billion total parameters, "
|
|
8
|
+
"built on 36 layers and utilizes 16 query and "
|
|
9
|
+
"4 key/value attention heads."
|
|
10
|
+
),
|
|
11
|
+
"params": 3075100928,
|
|
12
|
+
"path": "smollm3",
|
|
13
|
+
},
|
|
14
|
+
"kaggle_handle": "kaggle://keras/smollm3/keras/smollm3_3b_en/1",
|
|
15
|
+
},
|
|
16
|
+
}
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sentencepiece import SentencePieceProcessor
|
|
3
|
+
|
|
4
|
+
from keras_hub.src.models.gemma3.gemma3_backbone import Gemma3Backbone
|
|
5
|
+
from keras_hub.src.models.gemma3.gemma3_vision_encoder import (
|
|
6
|
+
Gemma3VisionEncoder,
|
|
7
|
+
)
|
|
8
|
+
from keras_hub.src.utils.preset_utils import get_file
|
|
9
|
+
from keras_hub.src.utils.preset_utils import load_json
|
|
10
|
+
|
|
11
|
+
backbone_cls = Gemma3Backbone
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_image_converter_config(preset, transformers_config):
|
|
15
|
+
if "vision_config" in transformers_config:
|
|
16
|
+
preprocessor_config = load_json(preset, "preprocessor_config.json")
|
|
17
|
+
mean = preprocessor_config["image_mean"]
|
|
18
|
+
std = preprocessor_config["image_std"]
|
|
19
|
+
rescale_factor = preprocessor_config["rescale_factor"]
|
|
20
|
+
offset = [(-m / s) for m, s in zip(mean, std)]
|
|
21
|
+
scale = [(s * rescale_factor) for s in std]
|
|
22
|
+
image_size = transformers_config["vision_config"].get("image_size", 224)
|
|
23
|
+
return {
|
|
24
|
+
"image_size": (image_size, image_size),
|
|
25
|
+
"scale": scale,
|
|
26
|
+
"offset": offset,
|
|
27
|
+
}
|
|
28
|
+
else:
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def convert_backbone_config(transformers_config):
|
|
33
|
+
if transformers_config["model_type"] == "gemma3_text":
|
|
34
|
+
image_size = None
|
|
35
|
+
vision_encoder = None
|
|
36
|
+
transformer_config = transformers_config
|
|
37
|
+
else:
|
|
38
|
+
vision_config = transformers_config["vision_config"]
|
|
39
|
+
image_size = vision_config["image_size"]
|
|
40
|
+
vision_encoder_config = {
|
|
41
|
+
"image_size": image_size,
|
|
42
|
+
"patch_size": vision_config["patch_size"],
|
|
43
|
+
"num_heads": vision_config["num_attention_heads"],
|
|
44
|
+
"hidden_dim": vision_config["hidden_size"],
|
|
45
|
+
"num_layers": vision_config["num_hidden_layers"],
|
|
46
|
+
"intermediate_dim": vision_config["intermediate_size"],
|
|
47
|
+
"output_dim": 2560,
|
|
48
|
+
"pool_size": 4,
|
|
49
|
+
"layer_norm_epsilon": vision_config.get("layer_norm_eps", 1e-6),
|
|
50
|
+
}
|
|
51
|
+
vision_encoder = Gemma3VisionEncoder(**vision_encoder_config)
|
|
52
|
+
transformer_config = transformers_config["text_config"]
|
|
53
|
+
|
|
54
|
+
if "rope_parameters" in transformer_config:
|
|
55
|
+
rope_global_config = transformer_config.get("rope_parameters", {}).get(
|
|
56
|
+
"full_attention"
|
|
57
|
+
)
|
|
58
|
+
elif "rope_scaling" in transformer_config:
|
|
59
|
+
rope_global_config = transformer_config["rope_scaling"]
|
|
60
|
+
else:
|
|
61
|
+
rope_global_config = {}
|
|
62
|
+
return {
|
|
63
|
+
"vocabulary_size": transformer_config.get(
|
|
64
|
+
"vocab_size", 262144 if vision_encoder is None else 262208
|
|
65
|
+
),
|
|
66
|
+
"image_size": image_size,
|
|
67
|
+
"num_layers": transformer_config["num_hidden_layers"],
|
|
68
|
+
"num_query_heads": transformer_config.get("num_attention_heads", 8),
|
|
69
|
+
"num_key_value_heads": transformer_config.get("num_key_value_heads", 4),
|
|
70
|
+
"hidden_dim": transformer_config["hidden_size"],
|
|
71
|
+
"intermediate_dim": transformer_config["intermediate_size"],
|
|
72
|
+
"head_dim": transformer_config["head_dim"],
|
|
73
|
+
"use_post_ffw_norm": True,
|
|
74
|
+
"use_post_attention_norm": True,
|
|
75
|
+
"attention_logit_softcap": transformer_config.get(
|
|
76
|
+
"attn_logit_softcap", None
|
|
77
|
+
),
|
|
78
|
+
"final_logit_softcap": transformer_config.get(
|
|
79
|
+
"final_logit_softcap", None
|
|
80
|
+
),
|
|
81
|
+
"use_sliding_window_attention": True,
|
|
82
|
+
"query_head_dim_normalize": True,
|
|
83
|
+
"sliding_window_size": transformer_config["sliding_window"],
|
|
84
|
+
"local_rope_scaling_factor": 1.0,
|
|
85
|
+
"global_rope_scaling_factor": (
|
|
86
|
+
rope_global_config.get("factor", 1.0) if rope_global_config else 1.0
|
|
87
|
+
),
|
|
88
|
+
"layer_norm_epsilon": transformer_config.get("rms_norm_eps", 1e-6),
|
|
89
|
+
"use_bidirectional_attention": transformer_config.get(
|
|
90
|
+
"use_bidirectional_attention", False
|
|
91
|
+
),
|
|
92
|
+
"vision_encoder": vision_encoder,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def convert_weights(backbone, loader, transformers_config):
|
|
97
|
+
if transformers_config["model_type"] == "gemma3_text":
|
|
98
|
+
prefix = "model"
|
|
99
|
+
else:
|
|
100
|
+
prefix = "language_model.model"
|
|
101
|
+
|
|
102
|
+
loader.port_weight(
|
|
103
|
+
keras_variable=backbone.get_layer("token_embedding").embeddings,
|
|
104
|
+
hf_weight_key=f"{prefix}.embed_tokens.weight",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def transpose(x, shape):
|
|
108
|
+
return np.transpose(x)
|
|
109
|
+
|
|
110
|
+
vision_encoder = backbone.vision_encoder
|
|
111
|
+
if vision_encoder is not None:
|
|
112
|
+
image_encoder = vision_encoder.get_layer("image_encoder")
|
|
113
|
+
|
|
114
|
+
loader.port_weight(
|
|
115
|
+
keras_variable=image_encoder.vision_embeddings.patch_embedding.kernel,
|
|
116
|
+
hf_weight_key="vision_tower.vision_model.embeddings.patch_embedding.weight",
|
|
117
|
+
hook_fn=lambda x, _: np.transpose(x, (2, 3, 1, 0)),
|
|
118
|
+
)
|
|
119
|
+
loader.port_weight(
|
|
120
|
+
keras_variable=image_encoder.vision_embeddings.patch_embedding.bias,
|
|
121
|
+
hf_weight_key="vision_tower.vision_model.embeddings.patch_embedding.bias",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
loader.port_weight(
|
|
125
|
+
keras_variable=image_encoder.vision_embeddings.position_embedding.embeddings,
|
|
126
|
+
hf_weight_key="vision_tower.vision_model.embeddings.position_embedding.weight",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
for i in range(image_encoder.num_layers):
|
|
130
|
+
loader.port_weight(
|
|
131
|
+
keras_variable=image_encoder.resblocks[i].layer_norm_1.gamma,
|
|
132
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm1.weight",
|
|
133
|
+
)
|
|
134
|
+
loader.port_weight(
|
|
135
|
+
keras_variable=image_encoder.resblocks[i].layer_norm_1.beta,
|
|
136
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm1.bias",
|
|
137
|
+
)
|
|
138
|
+
loader.port_weight(
|
|
139
|
+
keras_variable=image_encoder.resblocks[
|
|
140
|
+
i
|
|
141
|
+
].attn.query_proj.kernel,
|
|
142
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.q_proj.weight",
|
|
143
|
+
hook_fn=transpose,
|
|
144
|
+
)
|
|
145
|
+
loader.port_weight(
|
|
146
|
+
keras_variable=image_encoder.resblocks[i].attn.query_proj.bias,
|
|
147
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.q_proj.bias",
|
|
148
|
+
)
|
|
149
|
+
loader.port_weight(
|
|
150
|
+
keras_variable=image_encoder.resblocks[i].attn.key_proj.kernel,
|
|
151
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.k_proj.weight",
|
|
152
|
+
hook_fn=transpose,
|
|
153
|
+
)
|
|
154
|
+
loader.port_weight(
|
|
155
|
+
keras_variable=image_encoder.resblocks[i].attn.key_proj.bias,
|
|
156
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.k_proj.bias",
|
|
157
|
+
)
|
|
158
|
+
loader.port_weight(
|
|
159
|
+
keras_variable=image_encoder.resblocks[
|
|
160
|
+
i
|
|
161
|
+
].attn.value_proj.kernel,
|
|
162
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.v_proj.weight",
|
|
163
|
+
hook_fn=transpose,
|
|
164
|
+
)
|
|
165
|
+
loader.port_weight(
|
|
166
|
+
keras_variable=image_encoder.resblocks[i].attn.value_proj.bias,
|
|
167
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.v_proj.bias",
|
|
168
|
+
)
|
|
169
|
+
loader.port_weight(
|
|
170
|
+
keras_variable=image_encoder.resblocks[i].attn.out_proj.kernel,
|
|
171
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.out_proj.weight",
|
|
172
|
+
hook_fn=transpose,
|
|
173
|
+
)
|
|
174
|
+
loader.port_weight(
|
|
175
|
+
keras_variable=image_encoder.resblocks[i].attn.out_proj.bias,
|
|
176
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.self_attn.out_proj.bias",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
loader.port_weight(
|
|
180
|
+
keras_variable=image_encoder.resblocks[i].layer_norm_2.gamma,
|
|
181
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm2.weight",
|
|
182
|
+
)
|
|
183
|
+
loader.port_weight(
|
|
184
|
+
keras_variable=image_encoder.resblocks[i].layer_norm_2.beta,
|
|
185
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.layer_norm2.bias",
|
|
186
|
+
)
|
|
187
|
+
loader.port_weight(
|
|
188
|
+
keras_variable=image_encoder.resblocks[i].mlp_dense_1.kernel,
|
|
189
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc1.weight",
|
|
190
|
+
hook_fn=transpose,
|
|
191
|
+
)
|
|
192
|
+
loader.port_weight(
|
|
193
|
+
keras_variable=image_encoder.resblocks[i].mlp_dense_1.bias,
|
|
194
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc1.bias",
|
|
195
|
+
)
|
|
196
|
+
loader.port_weight(
|
|
197
|
+
keras_variable=image_encoder.resblocks[i].mlp_dense_2.kernel,
|
|
198
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc2.weight",
|
|
199
|
+
hook_fn=transpose,
|
|
200
|
+
)
|
|
201
|
+
loader.port_weight(
|
|
202
|
+
keras_variable=image_encoder.resblocks[i].mlp_dense_2.bias,
|
|
203
|
+
hf_weight_key=f"vision_tower.vision_model.encoder.layers.{i}.mlp.fc2.bias",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
loader.port_weight(
|
|
207
|
+
keras_variable=image_encoder.encoder_layer_norm.gamma,
|
|
208
|
+
hf_weight_key="vision_tower.vision_model.post_layernorm.weight",
|
|
209
|
+
)
|
|
210
|
+
loader.port_weight(
|
|
211
|
+
keras_variable=image_encoder.encoder_layer_norm.beta,
|
|
212
|
+
hf_weight_key="vision_tower.vision_model.post_layernorm.bias",
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
loader.port_weight(
|
|
216
|
+
keras_variable=vision_encoder.get_layer(
|
|
217
|
+
"vision_output_encoder"
|
|
218
|
+
).vision_soft_embedding_norm.scale,
|
|
219
|
+
hf_weight_key="multi_modal_projector.mm_soft_emb_norm.weight",
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
loader.port_weight(
|
|
223
|
+
keras_variable=vision_encoder.get_layer(
|
|
224
|
+
"vision_output_encoder"
|
|
225
|
+
).vision_input_projection.kernel,
|
|
226
|
+
hf_weight_key="multi_modal_projector.mm_input_projection_weight",
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
for i in range(backbone.num_layers):
|
|
230
|
+
decoder_layer = backbone.get_layer(f"decoder_block_{i}")
|
|
231
|
+
|
|
232
|
+
loader.port_weight(
|
|
233
|
+
keras_variable=decoder_layer.pre_attention_norm.scale,
|
|
234
|
+
hf_weight_key=f"{prefix}.layers.{i}.input_layernorm.weight",
|
|
235
|
+
)
|
|
236
|
+
loader.port_weight(
|
|
237
|
+
keras_variable=decoder_layer.post_attention_norm.scale,
|
|
238
|
+
hf_weight_key=f"{prefix}.layers.{i}.post_attention_layernorm.weight",
|
|
239
|
+
)
|
|
240
|
+
loader.port_weight(
|
|
241
|
+
keras_variable=decoder_layer.pre_ffw_norm.scale,
|
|
242
|
+
hf_weight_key=f"{prefix}.layers.{i}.pre_feedforward_layernorm.weight",
|
|
243
|
+
)
|
|
244
|
+
loader.port_weight(
|
|
245
|
+
keras_variable=decoder_layer.post_ffw_norm.scale,
|
|
246
|
+
hf_weight_key=f"{prefix}.layers.{i}.post_feedforward_layernorm.weight",
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Attention layers
|
|
250
|
+
|
|
251
|
+
## Query
|
|
252
|
+
loader.port_weight(
|
|
253
|
+
keras_variable=decoder_layer.attention.query_dense.kernel,
|
|
254
|
+
hf_weight_key=f"{prefix}.layers.{i}.self_attn.q_proj.weight",
|
|
255
|
+
hook_fn=lambda hf_tensor, keras_shape: np.transpose(
|
|
256
|
+
np.reshape(
|
|
257
|
+
hf_tensor,
|
|
258
|
+
(keras_shape[0], keras_shape[2], keras_shape[1]),
|
|
259
|
+
),
|
|
260
|
+
axes=(0, 2, 1),
|
|
261
|
+
),
|
|
262
|
+
)
|
|
263
|
+
loader.port_weight(
|
|
264
|
+
keras_variable=decoder_layer.attention.query_norm.scale,
|
|
265
|
+
hf_weight_key=f"{prefix}.layers.{i}.self_attn.q_norm.weight",
|
|
266
|
+
)
|
|
267
|
+
## Key
|
|
268
|
+
loader.port_weight(
|
|
269
|
+
keras_variable=decoder_layer.attention.key_dense.kernel,
|
|
270
|
+
hf_weight_key=f"{prefix}.layers.{i}.self_attn.k_proj.weight",
|
|
271
|
+
hook_fn=lambda hf_tensor, keras_shape: np.transpose(
|
|
272
|
+
np.reshape(
|
|
273
|
+
hf_tensor,
|
|
274
|
+
(keras_shape[0], keras_shape[2], keras_shape[1]),
|
|
275
|
+
),
|
|
276
|
+
axes=(0, 2, 1),
|
|
277
|
+
),
|
|
278
|
+
)
|
|
279
|
+
loader.port_weight(
|
|
280
|
+
keras_variable=decoder_layer.attention.key_norm.scale,
|
|
281
|
+
hf_weight_key=f"{prefix}.layers.{i}.self_attn.k_norm.weight",
|
|
282
|
+
)
|
|
283
|
+
## Value
|
|
284
|
+
loader.port_weight(
|
|
285
|
+
keras_variable=decoder_layer.attention.value_dense.kernel,
|
|
286
|
+
hf_weight_key=f"{prefix}.layers.{i}.self_attn.v_proj.weight",
|
|
287
|
+
hook_fn=lambda hf_tensor, keras_shape: np.transpose(
|
|
288
|
+
np.reshape(
|
|
289
|
+
hf_tensor,
|
|
290
|
+
(keras_shape[0], keras_shape[2], keras_shape[1]),
|
|
291
|
+
),
|
|
292
|
+
axes=(0, 2, 1),
|
|
293
|
+
),
|
|
294
|
+
)
|
|
295
|
+
## Output
|
|
296
|
+
loader.port_weight(
|
|
297
|
+
keras_variable=decoder_layer.attention.output_dense.kernel,
|
|
298
|
+
hf_weight_key=f"{prefix}.layers.{i}.self_attn.o_proj.weight",
|
|
299
|
+
# rearrange_patterns="c (a b) -> a b c",
|
|
300
|
+
# rearrange_dims={"a": backbone.num_query_heads},
|
|
301
|
+
hook_fn=lambda hf_tensor, keras_shape: np.transpose(
|
|
302
|
+
np.reshape(
|
|
303
|
+
hf_tensor,
|
|
304
|
+
(keras_shape[2], keras_shape[0], keras_shape[1]),
|
|
305
|
+
),
|
|
306
|
+
axes=(1, 2, 0),
|
|
307
|
+
),
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# MLP layers
|
|
311
|
+
loader.port_weight(
|
|
312
|
+
keras_variable=decoder_layer.gating_ffw.kernel,
|
|
313
|
+
hf_weight_key=f"{prefix}.layers.{i}.mlp.gate_proj.weight",
|
|
314
|
+
# rearrange_patterns="b a -> a b",
|
|
315
|
+
hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
|
|
316
|
+
)
|
|
317
|
+
loader.port_weight(
|
|
318
|
+
keras_variable=decoder_layer.gating_ffw_2.kernel,
|
|
319
|
+
hf_weight_key=f"{prefix}.layers.{i}.mlp.up_proj.weight",
|
|
320
|
+
# rearrange_patterns="b a -> a b",
|
|
321
|
+
hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
|
|
322
|
+
)
|
|
323
|
+
loader.port_weight(
|
|
324
|
+
keras_variable=decoder_layer.ffw_linear.kernel,
|
|
325
|
+
hf_weight_key=f"{prefix}.layers.{i}.mlp.down_proj.weight",
|
|
326
|
+
# rearrange_patterns="b a -> a b",
|
|
327
|
+
hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Final normalization layer
|
|
331
|
+
loader.port_weight(
|
|
332
|
+
keras_variable=backbone.get_layer("final_normalization").scale,
|
|
333
|
+
hf_weight_key=f"{prefix}.norm.weight",
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return backbone
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def convert_tokenizer(cls, preset, **kwargs):
|
|
340
|
+
proto = get_file(preset, "tokenizer.model")
|
|
341
|
+
sp = SentencePieceProcessor()
|
|
342
|
+
if isinstance(proto, bytes):
|
|
343
|
+
sp.LoadFromSerializedProto(proto)
|
|
344
|
+
else:
|
|
345
|
+
sp.load(proto)
|
|
346
|
+
|
|
347
|
+
has_vision_tokens = (
|
|
348
|
+
sp.PieceToId("<start_of_image>") != sp.unk_id()
|
|
349
|
+
and sp.PieceToId("<img>") != sp.unk_id()
|
|
350
|
+
and sp.PieceToId("<end_of_image>") != sp.unk_id()
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
return cls(proto, has_vision_tokens=has_vision_tokens, **kwargs)
|
|
@@ -12,6 +12,7 @@ from keras_hub.src.utils.transformers import convert_dinov3
|
|
|
12
12
|
from keras_hub.src.utils.transformers import convert_distilbert
|
|
13
13
|
from keras_hub.src.utils.transformers import convert_esm
|
|
14
14
|
from keras_hub.src.utils.transformers import convert_gemma
|
|
15
|
+
from keras_hub.src.utils.transformers import convert_gemma3
|
|
15
16
|
from keras_hub.src.utils.transformers import convert_gpt2
|
|
16
17
|
from keras_hub.src.utils.transformers import convert_llama3
|
|
17
18
|
from keras_hub.src.utils.transformers import convert_mistral
|
|
@@ -49,6 +50,8 @@ class TransformersPresetLoader(PresetLoader):
|
|
|
49
50
|
self.converter = convert_esm
|
|
50
51
|
elif model_type in ("gemma", "gemma2"):
|
|
51
52
|
self.converter = convert_gemma
|
|
53
|
+
elif model_type in ("gemma3", "gemma3_text"):
|
|
54
|
+
self.converter = convert_gemma3
|
|
52
55
|
elif model_type == "gpt2":
|
|
53
56
|
self.converter = convert_gpt2
|
|
54
57
|
elif model_type == "llama":
|
|
@@ -115,5 +118,11 @@ class TransformersPresetLoader(PresetLoader):
|
|
|
115
118
|
return self.converter.convert_tokenizer(cls, self.preset, **kwargs)
|
|
116
119
|
|
|
117
120
|
def load_image_converter(self, cls, **kwargs):
|
|
121
|
+
if hasattr(self.converter, "load_image_converter_config"):
|
|
122
|
+
config = self.converter.load_image_converter_config(
|
|
123
|
+
self.preset, self.config
|
|
124
|
+
)
|
|
125
|
+
if config is not None:
|
|
126
|
+
return cls(**{**config, **kwargs})
|
|
118
127
|
# TODO: set image size for pali gemma checkpoints.
|
|
119
128
|
return None
|
keras_hub/src/version.py
CHANGED
|
@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=XGYkwfBVZiPw5ZjSV5S_n3FnkPf06yYNzxZjXMhiX70,
|
|
|
5
5
|
keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
|
|
6
6
|
keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
|
|
8
|
-
keras_hub/src/version.py,sha256=
|
|
8
|
+
keras_hub/src/version.py,sha256=RwtmvWeWT3DYRT6L3ZzcKzMVzwl83RHUWlQ1KmiI2nM,222
|
|
9
9
|
keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
|
|
@@ -44,7 +44,7 @@ keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
44
44
|
keras_hub/src/models/audio_to_text.py,sha256=XoOjXtKBX6K1fz-zOXcdVo3FpjuxCMnJZh2LQcYXb_0,2726
|
|
45
45
|
keras_hub/src/models/audio_to_text_preprocessor.py,sha256=GS-WWyJ6aSsPRxi_0bxvxA00h2mT2FEwSdAoQXAUYVI,3249
|
|
46
46
|
keras_hub/src/models/backbone.py,sha256=BdqPsne7lIITIxn6jY6AN4vZ-Rc9VnpqTxvVNR3CS7M,12210
|
|
47
|
-
keras_hub/src/models/causal_lm.py,sha256=
|
|
47
|
+
keras_hub/src/models/causal_lm.py,sha256=3WVFo9WAd4ZDu-62X98JMoUw1bqfTlVR0lQPb6vmr1g,18989
|
|
48
48
|
keras_hub/src/models/causal_lm_preprocessor.py,sha256=nxl-sfmCfkfl6JmVRASa878QbaZUgWSA6Jdu48x4-dY,7155
|
|
49
49
|
keras_hub/src/models/depth_estimator.py,sha256=JR7wtunOPrfEoDkLspoZnL2ItWhZFDeAxxw2vue5QLs,8992
|
|
50
50
|
keras_hub/src/models/depth_estimator_preprocessor.py,sha256=2iE8NAUyiD2AvjZwNoXKUaOUogcE1fRzTNXLQ75GZpQ,2822
|
|
@@ -55,7 +55,7 @@ keras_hub/src/models/image_segmenter.py,sha256=C1bzIO59pG58iist5GLn_qnlotDpcAVxP
|
|
|
55
55
|
keras_hub/src/models/image_segmenter_preprocessor.py,sha256=d7I2Hk0SKWyKpjRS6WYccmh_CYQBpWoj0JF5RRrU6rw,3748
|
|
56
56
|
keras_hub/src/models/image_to_image.py,sha256=nblRd-16n5_JxKIH6IJU7bHTFRGxyCpKUilg6VjWuek,16933
|
|
57
57
|
keras_hub/src/models/inpaint.py,sha256=oqdj0Q9dNG54g6sNQ5foto8saPd5Sx8kYZuHCZPBqrY,20995
|
|
58
|
-
keras_hub/src/models/masked_lm.py,sha256=
|
|
58
|
+
keras_hub/src/models/masked_lm.py,sha256=EPDHfMxyO_pciKuLooR6T5XYRcfvsqgfDXumXkA41No,4221
|
|
59
59
|
keras_hub/src/models/masked_lm_preprocessor.py,sha256=g8vrnyYwqdnSw5xppROM1Gzo_jmMWKYZoQCsKdfrFKk,5656
|
|
60
60
|
keras_hub/src/models/object_detector.py,sha256=oAK42fFBKuN0G_WM-DhygFkgQ0KsEwU_ZiU4umHywqc,3757
|
|
61
61
|
keras_hub/src/models/object_detector_preprocessor.py,sha256=kOSVRNFAg-UjtrCEVBdHXUFyJy7kQtlVuGnZ1aLEfOk,2664
|
|
@@ -180,7 +180,7 @@ keras_hub/src/models/dinov3/__init__.py,sha256=AI7vTZJBG6Ygb48o6pXtHzxKk0Rek3p7-
|
|
|
180
180
|
keras_hub/src/models/dinov3/dinov3_backbone.py,sha256=WDHipJSG10seRzYG_hARifF52wqhj9enkhuZ6mgJmjw,10511
|
|
181
181
|
keras_hub/src/models/dinov3/dinov3_image_converter.py,sha256=_oHDcI2CoxjbSLxLfkK1zEPcf4Goy0S66igmrXt58cQ,342
|
|
182
182
|
keras_hub/src/models/dinov3/dinov3_layers.py,sha256=w5K2btblrgrULqzPQdbvtkyR5Px2UZkqcZQ7jq2K3Uk,37169
|
|
183
|
-
keras_hub/src/models/dinov3/dinov3_presets.py,sha256=
|
|
183
|
+
keras_hub/src/models/dinov3/dinov3_presets.py,sha256=oAAhMFbBMPmhtoDj3DMZz9zAG1DVSrf-xw0czoPwOEc,3148
|
|
184
184
|
keras_hub/src/models/distil_bert/__init__.py,sha256=3Z0w-Mt3aOR0u9RGzjHQ7B3J3qBF2pGjupDGQ9yyzoc,303
|
|
185
185
|
keras_hub/src/models/distil_bert/distil_bert_backbone.py,sha256=rnAf_GokB3wAeJwVZtgUKQO_bKJIa8RavhL_ykTJpNw,6440
|
|
186
186
|
keras_hub/src/models/distil_bert/distil_bert_masked_lm.py,sha256=axeZd5UcxFr3_Q8H4yG10CINh93wbcyjlPLauqe5N9E,4289
|
|
@@ -203,7 +203,7 @@ keras_hub/src/models/electra/electra_backbone.py,sha256=h-QuFxACBvbMktkyGV2pIgn6
|
|
|
203
203
|
keras_hub/src/models/electra/electra_presets.py,sha256=6f0WAYtDx5To4gvi6btN8I8y7yfc9ANchTHRKgCyIkg,2697
|
|
204
204
|
keras_hub/src/models/electra/electra_tokenizer.py,sha256=Ll_EW-14i-OZr6appQEt5ceMUCeEadF4yPJHMwaRfVs,2729
|
|
205
205
|
keras_hub/src/models/esm/__init__.py,sha256=_IlazeBwHkpetmLIZz3fFzC8CFcSnBRVQvw9nes4TN8,239
|
|
206
|
-
keras_hub/src/models/esm/esm_attention.py,sha256=
|
|
206
|
+
keras_hub/src/models/esm/esm_attention.py,sha256=w74krK1xcsECGLS7v5MfABeW_4n5mexaqhosw6oGPnw,3729
|
|
207
207
|
keras_hub/src/models/esm/esm_backbone.py,sha256=ADIpeiYz16fw1PNvx2tX-51HsZ_AjR2wGLkXZHErWBg,8696
|
|
208
208
|
keras_hub/src/models/esm/esm_classifier.py,sha256=35-_3U725JhzspQAO_4ZkTJ0Tuy0XKMVkSrpmFz2CaE,6049
|
|
209
209
|
keras_hub/src/models/esm/esm_classifier_preprocessor.py,sha256=TXjGH8ttElEsfBLOMLrxP24uPCYVS78iCrnpsGwurII,5532
|
|
@@ -238,7 +238,7 @@ keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=2kI2vSZvTia5
|
|
|
238
238
|
keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
|
|
239
239
|
keras_hub/src/models/gemma/gemma_attention.py,sha256=wmU5FgQu1Ajg-KHKVXTLHWH7pXqN4_zVJTCp_FXMcAs,10095
|
|
240
240
|
keras_hub/src/models/gemma/gemma_backbone.py,sha256=pAAVaVKB6nlA0PncVnFXvNgJV7SeZy_ko2AxoIs0jF0,13364
|
|
241
|
-
keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=
|
|
241
|
+
keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=5sOXph25bfwlz-o4MYuWU1BN8yTcj5l0R-x6y0sFams,17315
|
|
242
242
|
keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
|
|
243
243
|
keras_hub/src/models/gemma/gemma_decoder_block.py,sha256=f5UsRO-VNsKJfm_WHVJWK4UahhzYm3sKprJ8jjr-zm4,7628
|
|
244
244
|
keras_hub/src/models/gemma/gemma_presets.py,sha256=wAH7mjz9tbQqqdwajU2dilGytnWK1qc-aTIVLtjpTWg,8263
|
|
@@ -248,17 +248,17 @@ keras_hub/src/models/gemma3/__init__.py,sha256=oPFadkdK5DRLD6sYx83iTetY5daWuSzmJ
|
|
|
248
248
|
keras_hub/src/models/gemma3/gemma3_attention.py,sha256=u3RNI8dva5lzzqFNTAe9996s87cNJ_GEWc9BIJD337Q,15473
|
|
249
249
|
keras_hub/src/models/gemma3/gemma3_backbone.py,sha256=HdWDRuF9MMwIzNVZEd1j53ILzptskvCxFiO__nfVQYU,16686
|
|
250
250
|
keras_hub/src/models/gemma3/gemma3_causal_lm.py,sha256=U3C9TWlIz8VefAxQ0wJ6bDz18wqHBie8B26Ub_nFZs4,13843
|
|
251
|
-
keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=
|
|
251
|
+
keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=_gvKPoXqNXpXcsfc8L29wW50MToHIr2D-4Q6MNVfBU0,29790
|
|
252
252
|
keras_hub/src/models/gemma3/gemma3_decoder_block.py,sha256=CYwYazqwakLNfhOLBl_8Q2TVZcMcOxMtiZtuVlk_hoo,11470
|
|
253
253
|
keras_hub/src/models/gemma3/gemma3_image_converter.py,sha256=czi5JrTyKiK0nFzvonviBIX8jjvLHqvGNA9RyheB31k,536
|
|
254
254
|
keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py,sha256=CfYdudk5En9iU6vEnrcrEWIztloD1r8VzF2extqAhAM,4616
|
|
255
255
|
keras_hub/src/models/gemma3/gemma3_presets.py,sha256=1GZSwsGRA19RllhZPR-kFjH5y9A6308V3TYfqHAnXUw,7744
|
|
256
|
-
keras_hub/src/models/gemma3/gemma3_tokenizer.py,sha256=
|
|
256
|
+
keras_hub/src/models/gemma3/gemma3_tokenizer.py,sha256=7ZrWgSnXTnxbJVGdHPrz2rSJZhOkii0cxd5zvx4pY-0,3730
|
|
257
257
|
keras_hub/src/models/gemma3/gemma3_vision_encoder.py,sha256=7XI0oBjIfJItV5w90t5bWb3C2KzjhvDnIC7wjIq4Cns,20850
|
|
258
258
|
keras_hub/src/models/gemma3/rms_normalization.py,sha256=fku-JEo2sNy-ytX7ySD1sRzdhRAPmYex_z8oFk1NiG8,833
|
|
259
259
|
keras_hub/src/models/gpt2/__init__.py,sha256=_hqeljpBkW8DLABy4nKBzJxXUh29WIEW27obmDCiH5Q,245
|
|
260
260
|
keras_hub/src/models/gpt2/gpt2_backbone.py,sha256=H1LgDd-bavrWtdCavdI519qlaruE2Jj5H3-SMc-5d14,6961
|
|
261
|
-
keras_hub/src/models/gpt2/gpt2_causal_lm.py,sha256=
|
|
261
|
+
keras_hub/src/models/gpt2/gpt2_causal_lm.py,sha256=i2veiYijPTsr6RqH5pR93uDw7rU9otQttdFBuVUxA80,17380
|
|
262
262
|
keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py,sha256=3AD1LBFJ-u6bDdrwKa1LbINlEblZkhwB2sMJx-XEUZk,2992
|
|
263
263
|
keras_hub/src/models/gpt2/gpt2_preprocessor.py,sha256=eYMIXw8Oebsr14GhqBh1CEhbLbIK3WnLUxaXj25fFpQ,3179
|
|
264
264
|
keras_hub/src/models/gpt2/gpt2_presets.py,sha256=1mflR1dVuEwFfNe3Fkra6vt7DrjmkAckjyP-LclNLFc,1897
|
|
@@ -397,7 +397,7 @@ keras_hub/src/models/qwen3/qwen3_causal_lm.py,sha256=cn_4WFVxhlOArtIGAaqkNzIz9Rx
|
|
|
397
397
|
keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py,sha256=H4g-bgvuhAUnDwjJovydK16Kes38ZFZWPvflrgHqZis,458
|
|
398
398
|
keras_hub/src/models/qwen3/qwen3_decoder.py,sha256=68s9jQj53zFmXE4-SGXKYHu546fXOyi9LUbnKk-HGYY,11595
|
|
399
399
|
keras_hub/src/models/qwen3/qwen3_layernorm.py,sha256=EJxjf7Pr6ufPQnNeuYQxkExzPjPk4PQxqMsoBeSEkDo,1073
|
|
400
|
-
keras_hub/src/models/qwen3/qwen3_presets.py,sha256=
|
|
400
|
+
keras_hub/src/models/qwen3/qwen3_presets.py,sha256=3ml8rh9dmHCt_TP85xSmkoOxEd5cx5UJ1ts1yB5tTLo,3922
|
|
401
401
|
keras_hub/src/models/qwen3/qwen3_tokenizer.py,sha256=LmPtg0vprMchDvYfTj8m5PraXI2QS3-YgdIIpIm5iAs,1448
|
|
402
402
|
keras_hub/src/models/qwen3_moe/__init__.py,sha256=0jp5BHZ8O8cCrp4g6VWWDUwB5_fSDXvCVCSf6Q0UB6o,273
|
|
403
403
|
keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py,sha256=rZnzWA-cAhuWSuHSJfrNqf5_Cu0PNEe7PKbPNbhJdeM,13355
|
|
@@ -476,10 +476,12 @@ keras_hub/src/models/siglip/siglip_presets.py,sha256=jtIQrNcq14othG1QgwBEfozEmoH
|
|
|
476
476
|
keras_hub/src/models/siglip/siglip_text_encoder.py,sha256=xOVvzyQHLX9ne30y4ussar99gNMXPXHYKlkbCX_On2Y,5380
|
|
477
477
|
keras_hub/src/models/siglip/siglip_tokenizer.py,sha256=j_67JbIHJDRk-CbiemG2dgAO6lp3_0_JdnfroZ90G18,2579
|
|
478
478
|
keras_hub/src/models/siglip/siglip_vision_encoder.py,sha256=CaNaFq5thBC3TUXXOf2qknk5vWsauM20ZoaDPYRnXcs,5927
|
|
479
|
+
keras_hub/src/models/smollm3/__init__.py,sha256=_svUTRF_mnFqe2cPP03CASKbDes6FvI76y_3TEHaxes,263
|
|
479
480
|
keras_hub/src/models/smollm3/smollm3_backbone.py,sha256=9e8ydwy7X0stVEWgIJYt69vt6JYSCiYpM73w6oLxyoQ,7949
|
|
480
481
|
keras_hub/src/models/smollm3/smollm3_causal_lm.py,sha256=PWn2zPu0YS3uRvmjksmXKXpxehl8lvEHAXaO0u7nweM,12641
|
|
481
482
|
keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py,sha256=gbj7IhDbgA30AM80uG6BeI1yZmGd5yQ2VEaPWWyS9M4,3121
|
|
482
483
|
keras_hub/src/models/smollm3/smollm3_layers.py,sha256=lR33IynX-1G42L3hNzOBnnIx-INOzJguSQDAwIPaSIQ,26632
|
|
484
|
+
keras_hub/src/models/smollm3/smollm3_presets.py,sha256=-DRT6uUEa3b7KFpgUmAToh-kXKfyGiNrxAMz-0R8R6E,499
|
|
483
485
|
keras_hub/src/models/smollm3/smollm3_tokenizer.py,sha256=evOVM8pgZUkWLoXAwWiYRSNNFZ7KBv1WtFdLqpHdCQU,1877
|
|
484
486
|
keras_hub/src/models/smollm3/smollm3_utils.py,sha256=zAqtZTv1G--k-Dbjvk886OcmsuIxyYicRiUQXcpyud0,1904
|
|
485
487
|
keras_hub/src/models/stable_diffusion_3/__init__.py,sha256=ZKYQuaRObyhKq8GVAHmoRvlXp6FpU8ChvutVCHyXKuc,343
|
|
@@ -607,6 +609,7 @@ keras_hub/src/utils/transformers/convert_dinov3.py,sha256=rZqowTASKSAQQ1HrwlD9_t
|
|
|
607
609
|
keras_hub/src/utils/transformers/convert_distilbert.py,sha256=SlfIRhSRk5c1ir2HGiDPiXa5XdOId_DbcnZO9lbwyZ8,6498
|
|
608
610
|
keras_hub/src/utils/transformers/convert_esm.py,sha256=rOgGnNY37ZbYnoVC3L-Y-yGGAxTRmYtQV0nJoandH2Y,6214
|
|
609
611
|
keras_hub/src/utils/transformers/convert_gemma.py,sha256=ElCgwBpSN5Q7rV5PJawTsoytPzs5ZjuwoY60YAe8y_A,6533
|
|
612
|
+
keras_hub/src/utils/transformers/convert_gemma3.py,sha256=TKdZh40_xHWXd9vdp7TY2EnDFXMJ8pOWoUsmFwx5EgE,14680
|
|
610
613
|
keras_hub/src/utils/transformers/convert_gpt2.py,sha256=HCeHN_-GiQJRxLCM9OCJJ1watPVpIBF8ujS8pGbBOWc,5703
|
|
611
614
|
keras_hub/src/utils/transformers/convert_llama3.py,sha256=DjVUyQbl4AV-h8VqSIzmxiCd7cYOKIJTYoLM__NtyY0,6413
|
|
612
615
|
keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS7eANJUXIsNy1RxWXy20Gqw,4760
|
|
@@ -619,13 +622,13 @@ keras_hub/src/utils/transformers/convert_qwen_moe.py,sha256=a7R28aln-PdAcNuKAXdr
|
|
|
619
622
|
keras_hub/src/utils/transformers/convert_smollm3.py,sha256=V2vWES85YSNXNx39I8OwAcOvSpb9KxUscrDr7ra-LPA,5281
|
|
620
623
|
keras_hub/src/utils/transformers/convert_t5gemma.py,sha256=DPOwd61UhjspKuCsk3_EaNvSADGP_f8KLcZARHYVk5Y,9490
|
|
621
624
|
keras_hub/src/utils/transformers/convert_vit.py,sha256=YAmXh519ecSgEO5B4g-aEQg1Bb_6ifFafLMqDTfLn_c,5259
|
|
622
|
-
keras_hub/src/utils/transformers/preset_loader.py,sha256=
|
|
625
|
+
keras_hub/src/utils/transformers/preset_loader.py,sha256=h9Oaodlyri25Ipb44C5zvMJ15m9IRxrykYjxsFSJWto,5795
|
|
623
626
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
|
|
624
627
|
keras_hub/src/utils/transformers/export/gemma.py,sha256=xX_vfQwvFZ_-lQX4kgMNOGKL7fL_1yk7QyGYV2Qyly4,4699
|
|
625
628
|
keras_hub/src/utils/transformers/export/hf_exporter.py,sha256=Qk52c6LIA2eMHUNY9Vy4STJSpnhLMdJ_t-3ljqhSr4k,5081
|
|
626
629
|
keras_hub/tokenizers/__init__.py,sha256=XFOxDmM1Mz9TxiE8ICZK_-yTTyRFywUUiVwRIzz2QZ8,4770
|
|
627
630
|
keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
|
|
628
|
-
keras_hub_nightly-0.24.0.
|
|
629
|
-
keras_hub_nightly-0.24.0.
|
|
630
|
-
keras_hub_nightly-0.24.0.
|
|
631
|
-
keras_hub_nightly-0.24.0.
|
|
631
|
+
keras_hub_nightly-0.24.0.dev202512090431.dist-info/METADATA,sha256=qqz7talwW8gweE-PoKMGj24qb4yQhFmC6Yk2ObRdJdw,7395
|
|
632
|
+
keras_hub_nightly-0.24.0.dev202512090431.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
633
|
+
keras_hub_nightly-0.24.0.dev202512090431.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
|
634
|
+
keras_hub_nightly-0.24.0.dev202512090431.dist-info/RECORD,,
|
|
File without changes
|