keras-hub-nightly 0.21.0.dev202505200408__py3-none-any.whl → 0.21.0.dev202505230409__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/src/models/gemma/gemma_presets.py +10 -10
- keras_hub/src/models/gemma3/gemma3_presets.py +8 -8
- keras_hub/src/models/llama/llama_presets.py +3 -3
- keras_hub/src/models/llama3/llama3_presets.py +2 -2
- keras_hub/src/models/mistral/mistral_presets.py +3 -3
- keras_hub/src/models/mixtral/mixtral_attention.py +31 -42
- keras_hub/src/models/mixtral/mixtral_presets.py +2 -2
- keras_hub/src/models/moonshine/moonshine_presets.py +2 -2
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +11 -11
- keras_hub/src/models/qwen/qwen_presets.py +6 -6
- keras_hub/src/models/qwen_moe/qwen_moe_attention.py +7 -9
- keras_hub/src/models/qwen_moe/qwen_moe_presets.py +1 -1
- keras_hub/src/utils/transformers/convert_mixtral.py +1 -1
- keras_hub/src/version.py +1 -1
- {keras_hub_nightly-0.21.0.dev202505200408.dist-info → keras_hub_nightly-0.21.0.dev202505230409.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.21.0.dev202505200408.dist-info → keras_hub_nightly-0.21.0.dev202505230409.dist-info}/RECORD +18 -18
- {keras_hub_nightly-0.21.0.dev202505200408.dist-info → keras_hub_nightly-0.21.0.dev202505230409.dist-info}/WHEEL +1 -1
- {keras_hub_nightly-0.21.0.dev202505200408.dist-info → keras_hub_nightly-0.21.0.dev202505230409.dist-info}/top_level.txt +0 -0
@@ -61,7 +61,7 @@ backbone_presets = {
|
|
61
61
|
"params": 8537680896,
|
62
62
|
"path": "gemma",
|
63
63
|
},
|
64
|
-
"kaggle_handle": "kaggle://keras/gemma/keras/gemma_7b_en/
|
64
|
+
"kaggle_handle": "kaggle://keras/gemma/keras/gemma_7b_en/4",
|
65
65
|
},
|
66
66
|
"gemma_instruct_7b_en": {
|
67
67
|
"metadata": {
|
@@ -71,7 +71,7 @@ backbone_presets = {
|
|
71
71
|
"params": 8537680896,
|
72
72
|
"path": "gemma",
|
73
73
|
},
|
74
|
-
"kaggle_handle": "kaggle://keras/gemma/keras/gemma_instruct_7b_en/
|
74
|
+
"kaggle_handle": "kaggle://keras/gemma/keras/gemma_instruct_7b_en/4",
|
75
75
|
},
|
76
76
|
"gemma_1.1_instruct_7b_en": {
|
77
77
|
"metadata": {
|
@@ -82,7 +82,7 @@ backbone_presets = {
|
|
82
82
|
"params": 8537680896,
|
83
83
|
"path": "gemma",
|
84
84
|
},
|
85
|
-
"kaggle_handle": "kaggle://keras/gemma/keras/gemma_1.1_instruct_7b_en/
|
85
|
+
"kaggle_handle": "kaggle://keras/gemma/keras/gemma_1.1_instruct_7b_en/5",
|
86
86
|
},
|
87
87
|
"code_gemma_7b_en": {
|
88
88
|
"metadata": {
|
@@ -94,7 +94,7 @@ backbone_presets = {
|
|
94
94
|
"params": 8537680896,
|
95
95
|
"path": "gemma",
|
96
96
|
},
|
97
|
-
"kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_7b_en/
|
97
|
+
"kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_7b_en/3",
|
98
98
|
},
|
99
99
|
"code_gemma_instruct_7b_en": {
|
100
100
|
"metadata": {
|
@@ -106,7 +106,7 @@ backbone_presets = {
|
|
106
106
|
"params": 8537680896,
|
107
107
|
"path": "gemma",
|
108
108
|
},
|
109
|
-
"kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_instruct_7b_en/
|
109
|
+
"kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_instruct_7b_en/3",
|
110
110
|
},
|
111
111
|
"code_gemma_1.1_instruct_7b_en": {
|
112
112
|
"metadata": {
|
@@ -118,7 +118,7 @@ backbone_presets = {
|
|
118
118
|
"params": 8537680896,
|
119
119
|
"path": "gemma",
|
120
120
|
},
|
121
|
-
"kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_1.1_instruct_7b_en/
|
121
|
+
"kaggle_handle": "kaggle://keras/codegemma/keras/code_gemma_1.1_instruct_7b_en/3",
|
122
122
|
},
|
123
123
|
"gemma2_2b_en": {
|
124
124
|
"metadata": {
|
@@ -144,7 +144,7 @@ backbone_presets = {
|
|
144
144
|
"params": 9241705984,
|
145
145
|
"path": "gemma",
|
146
146
|
},
|
147
|
-
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_9b_en/
|
147
|
+
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_9b_en/4",
|
148
148
|
},
|
149
149
|
"gemma2_instruct_9b_en": {
|
150
150
|
"metadata": {
|
@@ -154,7 +154,7 @@ backbone_presets = {
|
|
154
154
|
"params": 9241705984,
|
155
155
|
"path": "gemma",
|
156
156
|
},
|
157
|
-
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_9b_en/
|
157
|
+
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_9b_en/4",
|
158
158
|
},
|
159
159
|
"gemma2_27b_en": {
|
160
160
|
"metadata": {
|
@@ -162,7 +162,7 @@ backbone_presets = {
|
|
162
162
|
"params": 27227128320,
|
163
163
|
"path": "gemma",
|
164
164
|
},
|
165
|
-
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_27b_en/
|
165
|
+
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_27b_en/3",
|
166
166
|
},
|
167
167
|
"gemma2_instruct_27b_en": {
|
168
168
|
"metadata": {
|
@@ -172,7 +172,7 @@ backbone_presets = {
|
|
172
172
|
"params": 27227128320,
|
173
173
|
"path": "gemma",
|
174
174
|
},
|
175
|
-
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_27b_en/
|
175
|
+
"kaggle_handle": "kaggle://keras/gemma2/keras/gemma2_instruct_27b_en/3",
|
176
176
|
},
|
177
177
|
"shieldgemma_2b_en": {
|
178
178
|
"metadata": {
|
@@ -55,7 +55,7 @@ backbone_presets = {
|
|
55
55
|
"params": 11765788416,
|
56
56
|
"path": "gemma3",
|
57
57
|
},
|
58
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_12b_text/
|
58
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_12b_text/3",
|
59
59
|
},
|
60
60
|
"gemma3_instruct_12b_text": {
|
61
61
|
"metadata": {
|
@@ -66,7 +66,7 @@ backbone_presets = {
|
|
66
66
|
"params": 11765788416,
|
67
67
|
"path": "gemma3",
|
68
68
|
},
|
69
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_12b_text/
|
69
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_12b_text/3",
|
70
70
|
},
|
71
71
|
"gemma3_27b_text": {
|
72
72
|
"metadata": {
|
@@ -77,7 +77,7 @@ backbone_presets = {
|
|
77
77
|
"params": 27009002240,
|
78
78
|
"path": "gemma3",
|
79
79
|
},
|
80
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_27b_text/
|
80
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_27b_text/4",
|
81
81
|
},
|
82
82
|
"gemma3_instruct_27b_text": {
|
83
83
|
"metadata": {
|
@@ -88,7 +88,7 @@ backbone_presets = {
|
|
88
88
|
"params": 27009002240,
|
89
89
|
"path": "gemma3",
|
90
90
|
},
|
91
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_27b_text/
|
91
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_27b_text/3",
|
92
92
|
},
|
93
93
|
"gemma3_4b": {
|
94
94
|
"metadata": {
|
@@ -121,7 +121,7 @@ backbone_presets = {
|
|
121
121
|
"params": 12187079280,
|
122
122
|
"path": "gemma3",
|
123
123
|
},
|
124
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_12b/
|
124
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_12b/2",
|
125
125
|
},
|
126
126
|
"gemma3_instruct_12b": {
|
127
127
|
"metadata": {
|
@@ -132,7 +132,7 @@ backbone_presets = {
|
|
132
132
|
"params": 12187079280,
|
133
133
|
"path": "gemma3",
|
134
134
|
},
|
135
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_12b/
|
135
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_12b/2",
|
136
136
|
},
|
137
137
|
"gemma3_27b": {
|
138
138
|
"metadata": {
|
@@ -143,7 +143,7 @@ backbone_presets = {
|
|
143
143
|
"params": 27432062576,
|
144
144
|
"path": "gemma3",
|
145
145
|
},
|
146
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_27b/
|
146
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_27b/2",
|
147
147
|
},
|
148
148
|
"gemma3_instruct_27b": {
|
149
149
|
"metadata": {
|
@@ -154,6 +154,6 @@ backbone_presets = {
|
|
154
154
|
"params": 27432062576,
|
155
155
|
"path": "gemma3",
|
156
156
|
},
|
157
|
-
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_27b/
|
157
|
+
"kaggle_handle": "kaggle://keras/gemma3/keras/gemma3_instruct_27b/2",
|
158
158
|
},
|
159
159
|
}
|
@@ -8,7 +8,7 @@ backbone_presets = {
|
|
8
8
|
"params": 6738415616,
|
9
9
|
"path": "llama",
|
10
10
|
},
|
11
|
-
"kaggle_handle": "kaggle://keras/llama2/keras/llama2_7b_en/
|
11
|
+
"kaggle_handle": "kaggle://keras/llama2/keras/llama2_7b_en/3",
|
12
12
|
},
|
13
13
|
"llama2_7b_en_int8": {
|
14
14
|
"metadata": {
|
@@ -30,7 +30,7 @@ backbone_presets = {
|
|
30
30
|
"params": 6738415616,
|
31
31
|
"path": "llama",
|
32
32
|
},
|
33
|
-
"kaggle_handle": "kaggle://keras/llama2/keras/llama2_instruct_7b_en/
|
33
|
+
"kaggle_handle": "kaggle://keras/llama2/keras/llama2_instruct_7b_en/3",
|
34
34
|
},
|
35
35
|
"llama2_instruct_7b_en_int8": {
|
36
36
|
"metadata": {
|
@@ -52,6 +52,6 @@ backbone_presets = {
|
|
52
52
|
"params": 6738415616,
|
53
53
|
"path": "llama",
|
54
54
|
},
|
55
|
-
"kaggle_handle": "kaggle://keras/vicuna/keras/vicuna_1.5_7b_en/
|
55
|
+
"kaggle_handle": "kaggle://keras/vicuna/keras/vicuna_1.5_7b_en/3",
|
56
56
|
},
|
57
57
|
}
|
@@ -8,7 +8,7 @@ backbone_presets = {
|
|
8
8
|
"params": 8030261248,
|
9
9
|
"path": "llama3",
|
10
10
|
},
|
11
|
-
"kaggle_handle": "kaggle://keras/llama3/keras/llama3_8b_en/
|
11
|
+
"kaggle_handle": "kaggle://keras/llama3/keras/llama3_8b_en/5",
|
12
12
|
},
|
13
13
|
"llama3_8b_en_int8": {
|
14
14
|
"metadata": {
|
@@ -30,7 +30,7 @@ backbone_presets = {
|
|
30
30
|
"params": 8030261248,
|
31
31
|
"path": "llama3",
|
32
32
|
},
|
33
|
-
"kaggle_handle": "kaggle://keras/llama3/keras/llama3_instruct_8b_en/
|
33
|
+
"kaggle_handle": "kaggle://keras/llama3/keras/llama3_instruct_8b_en/5",
|
34
34
|
},
|
35
35
|
"llama3_instruct_8b_en_int8": {
|
36
36
|
"metadata": {
|
@@ -8,7 +8,7 @@ backbone_presets = {
|
|
8
8
|
"params": 7241732096,
|
9
9
|
"path": "mistral",
|
10
10
|
},
|
11
|
-
"kaggle_handle": "kaggle://keras/mistral/keras/mistral_7b_en/
|
11
|
+
"kaggle_handle": "kaggle://keras/mistral/keras/mistral_7b_en/8",
|
12
12
|
},
|
13
13
|
"mistral_instruct_7b_en": {
|
14
14
|
"metadata": {
|
@@ -16,7 +16,7 @@ backbone_presets = {
|
|
16
16
|
"params": 7241732096,
|
17
17
|
"path": "mistral",
|
18
18
|
},
|
19
|
-
"kaggle_handle": "kaggle://keras/mistral/keras/mistral_instruct_7b_en/
|
19
|
+
"kaggle_handle": "kaggle://keras/mistral/keras/mistral_instruct_7b_en/8",
|
20
20
|
},
|
21
21
|
"mistral_0.2_instruct_7b_en": {
|
22
22
|
"metadata": {
|
@@ -24,6 +24,6 @@ backbone_presets = {
|
|
24
24
|
"params": 7241732096,
|
25
25
|
"path": "mistral",
|
26
26
|
},
|
27
|
-
"kaggle_handle": "kaggle://keras/mistral/keras/mistral_0.2_instruct_7b_en/
|
27
|
+
"kaggle_handle": "kaggle://keras/mistral/keras/mistral_0.2_instruct_7b_en/3",
|
28
28
|
},
|
29
29
|
}
|
@@ -27,19 +27,19 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
27
27
|
**kwargs,
|
28
28
|
):
|
29
29
|
super().__init__(**kwargs)
|
30
|
-
self.
|
31
|
-
self.
|
32
|
-
self.
|
33
|
-
self.
|
30
|
+
self.num_query_heads = num_query_heads
|
31
|
+
self.num_key_value_heads = num_key_value_heads
|
32
|
+
self.sliding_window = sliding_window
|
33
|
+
self.dropout = dropout
|
34
34
|
|
35
|
-
self.
|
36
|
-
self.
|
35
|
+
self.num_key_value_groups = num_query_heads // num_key_value_heads
|
36
|
+
self.rope_max_wavelength = rope_max_wavelength
|
37
37
|
|
38
38
|
self._kernel_initializer = keras.initializers.get(
|
39
39
|
clone_initializer(kernel_initializer)
|
40
40
|
)
|
41
41
|
|
42
|
-
self.
|
42
|
+
self.rope_scaling_factor = rope_scaling_factor
|
43
43
|
|
44
44
|
def build(self, inputs_shape):
|
45
45
|
# Einsum variables:
|
@@ -51,12 +51,12 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
51
51
|
# v = num key/value heads
|
52
52
|
# h = head dim
|
53
53
|
self._hidden_dim = inputs_shape[-1]
|
54
|
-
self._head_dim = self._hidden_dim // self.
|
54
|
+
self._head_dim = self._hidden_dim // self.num_query_heads
|
55
55
|
self._inv_norm_factor = 1.0 / math.sqrt(self._head_dim)
|
56
56
|
|
57
57
|
self.query_dense = keras.layers.EinsumDense(
|
58
58
|
equation="bqm,muh->bquh",
|
59
|
-
output_shape=(None, self.
|
59
|
+
output_shape=(None, self.num_query_heads, self._head_dim),
|
60
60
|
kernel_initializer=self._kernel_initializer,
|
61
61
|
dtype=self.dtype_policy,
|
62
62
|
name="query",
|
@@ -67,7 +67,7 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
67
67
|
equation="bkm,mvh->bkvh",
|
68
68
|
output_shape=(
|
69
69
|
None,
|
70
|
-
self.
|
70
|
+
self.num_key_value_heads,
|
71
71
|
self._head_dim,
|
72
72
|
),
|
73
73
|
kernel_initializer=self._kernel_initializer,
|
@@ -80,7 +80,7 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
80
80
|
equation="bkm,mvh->bkvh",
|
81
81
|
output_shape=(
|
82
82
|
None,
|
83
|
-
self.
|
83
|
+
self.num_key_value_heads,
|
84
84
|
self._head_dim,
|
85
85
|
),
|
86
86
|
kernel_initializer=self._kernel_initializer,
|
@@ -89,31 +89,31 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
89
89
|
)
|
90
90
|
self.value_dense.build(inputs_shape)
|
91
91
|
|
92
|
-
self.
|
92
|
+
self.softmax = keras.layers.Softmax(
|
93
93
|
axis=-1,
|
94
94
|
dtype="float32",
|
95
95
|
name="attention_softmax",
|
96
96
|
)
|
97
97
|
|
98
|
-
self.
|
99
|
-
rate=self.
|
98
|
+
self.dropout_layer = keras.layers.Dropout(
|
99
|
+
rate=self.dropout,
|
100
100
|
dtype=self.dtype_policy,
|
101
101
|
)
|
102
102
|
|
103
|
-
self.
|
103
|
+
self.output_dense = keras.layers.EinsumDense(
|
104
104
|
equation="bquh,uhm->bqm",
|
105
105
|
output_shape=(None, self._hidden_dim),
|
106
106
|
kernel_initializer=self._kernel_initializer,
|
107
107
|
dtype=self.dtype_policy,
|
108
108
|
name="attention_output",
|
109
109
|
)
|
110
|
-
self.
|
111
|
-
(None, None, self.
|
110
|
+
self.output_dense.build(
|
111
|
+
(None, None, self.num_query_heads, self._head_dim)
|
112
112
|
)
|
113
113
|
|
114
114
|
self.rotary_embedding_layer = RotaryEmbedding(
|
115
|
-
max_wavelength=self.
|
116
|
-
scaling_factor=self.
|
115
|
+
max_wavelength=self.rope_max_wavelength,
|
116
|
+
scaling_factor=self.rope_scaling_factor,
|
117
117
|
dtype=self.dtype_policy,
|
118
118
|
)
|
119
119
|
|
@@ -168,18 +168,18 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
168
168
|
|
169
169
|
# [batch_shape, seq_len, num_key_value_heads, head_dim]
|
170
170
|
# -> [batch_shape, seq_len, num_heads, head_dim]
|
171
|
-
key = ops.repeat(key, repeats=self.
|
172
|
-
value = ops.repeat(value, repeats=self.
|
171
|
+
key = ops.repeat(key, repeats=self.num_key_value_groups, axis=2)
|
172
|
+
value = ops.repeat(value, repeats=self.num_key_value_groups, axis=2)
|
173
173
|
|
174
174
|
attention_output = self._compute_attention(
|
175
175
|
query, key, value, attention_mask
|
176
176
|
)
|
177
177
|
|
178
|
-
attention_output = self.
|
178
|
+
attention_output = self.dropout_layer(
|
179
179
|
attention_output, training=training
|
180
180
|
)
|
181
181
|
|
182
|
-
attention_output = self.
|
182
|
+
attention_output = self.output_dense(attention_output)
|
183
183
|
|
184
184
|
if cache is not None:
|
185
185
|
return attention_output, cache
|
@@ -187,10 +187,8 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
187
187
|
|
188
188
|
def _masked_softmax(self, attention_scores, attention_mask=None):
|
189
189
|
if attention_mask is not None:
|
190
|
-
return self.
|
191
|
-
|
192
|
-
)
|
193
|
-
return self._softmax(attention_scores)
|
190
|
+
return self.softmax(attention_scores, attention_mask[:, None, :, :])
|
191
|
+
return self.softmax(attention_scores)
|
194
192
|
|
195
193
|
def _use_fused_attention_op(self):
|
196
194
|
if not fused_attention_op_available():
|
@@ -198,9 +196,6 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
198
196
|
if self.dropout > 0.0:
|
199
197
|
return False
|
200
198
|
if running_on_gpu():
|
201
|
-
# GPU never supports softcap in the fused op.
|
202
|
-
if self.logit_soft_cap is not None:
|
203
|
-
return False
|
204
199
|
return gpu_supports_fused_attention_op()
|
205
200
|
elif running_on_tpu():
|
206
201
|
# TPU supports softcap with on keras >= 3.10.
|
@@ -215,18 +210,12 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
215
210
|
attention_mask = ops.expand_dims(attention_mask, axis=1)
|
216
211
|
attention_mask = ops.cast(attention_mask, dtype="bool")
|
217
212
|
|
218
|
-
if self.logit_soft_cap:
|
219
|
-
kwargs = {"attn_logits_soft_cap": self.logit_soft_cap}
|
220
|
-
else:
|
221
|
-
kwargs = {}
|
222
|
-
|
223
213
|
attention_output = ops.dot_product_attention(
|
224
214
|
query,
|
225
215
|
key,
|
226
216
|
value,
|
227
217
|
mask=attention_mask,
|
228
218
|
scale=self._inv_norm_factor,
|
229
|
-
**kwargs,
|
230
219
|
)
|
231
220
|
return attention_output
|
232
221
|
|
@@ -249,15 +238,15 @@ class CachedMixtralAttention(keras.layers.Layer):
|
|
249
238
|
config = super().get_config()
|
250
239
|
config.update(
|
251
240
|
{
|
252
|
-
"num_query_heads": self.
|
253
|
-
"num_key_value_heads": self.
|
254
|
-
"rope_max_wavelength": self.
|
255
|
-
"rope_scaling_factor": self.
|
241
|
+
"num_query_heads": self.num_query_heads,
|
242
|
+
"num_key_value_heads": self.num_key_value_heads,
|
243
|
+
"rope_max_wavelength": self.rope_max_wavelength,
|
244
|
+
"rope_scaling_factor": self.rope_scaling_factor,
|
256
245
|
"kernel_initializer": keras.initializers.serialize(
|
257
246
|
self._kernel_initializer
|
258
247
|
),
|
259
|
-
"sliding_window": self.
|
260
|
-
"dropout": self.
|
248
|
+
"sliding_window": self.sliding_window,
|
249
|
+
"dropout": self.dropout,
|
261
250
|
}
|
262
251
|
)
|
263
252
|
return config
|
@@ -10,7 +10,7 @@ backbone_presets = {
|
|
10
10
|
"params": 46702792704,
|
11
11
|
"path": "mixtral",
|
12
12
|
},
|
13
|
-
"kaggle_handle": "kaggle://keras/mixtral/keras/mixtral_8_7b_en",
|
13
|
+
"kaggle_handle": "kaggle://keras/mixtral/keras/mixtral_8_7b_en/3",
|
14
14
|
},
|
15
15
|
"mixtral_8_instruct_7b_en": {
|
16
16
|
"metadata": {
|
@@ -21,6 +21,6 @@ backbone_presets = {
|
|
21
21
|
"params": 46702792704,
|
22
22
|
"path": "mixtral",
|
23
23
|
},
|
24
|
-
"kaggle_handle": "kaggle://keras/mixtral/keras/mixtral_8_instruct_7b_en",
|
24
|
+
"kaggle_handle": "kaggle://keras/mixtral/keras/mixtral_8_instruct_7b_en/3",
|
25
25
|
},
|
26
26
|
}
|
@@ -9,7 +9,7 @@ backbone_presets = {
|
|
9
9
|
"params": 27092736,
|
10
10
|
"path": "moonshine",
|
11
11
|
},
|
12
|
-
"kaggle_handle": "kaggle://keras/moonshine/Keras/moonshine_tiny_en",
|
12
|
+
"kaggle_handle": "kaggle://keras/moonshine/Keras/moonshine_tiny_en/1",
|
13
13
|
},
|
14
14
|
"moonshine_base_en": {
|
15
15
|
"metadata": {
|
@@ -20,6 +20,6 @@ backbone_presets = {
|
|
20
20
|
"params": 61513920,
|
21
21
|
"path": "moonshine",
|
22
22
|
},
|
23
|
-
"kaggle_handle": "kaggle://keras/moonshine/Keras/moonshine_base_en",
|
23
|
+
"kaggle_handle": "kaggle://keras/moonshine/Keras/moonshine_base_en/1",
|
24
24
|
},
|
25
25
|
}
|
@@ -81,7 +81,7 @@ backbone_presets = {
|
|
81
81
|
"path": "pali_gemma2",
|
82
82
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
83
83
|
},
|
84
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_ft_docci_10b_448/
|
84
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_ft_docci_10b_448/3",
|
85
85
|
},
|
86
86
|
"pali_gemma2_mix_3b_224": {
|
87
87
|
"metadata": {
|
@@ -126,7 +126,7 @@ backbone_presets = {
|
|
126
126
|
"path": "pali_gemma2",
|
127
127
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
128
128
|
},
|
129
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_224/
|
129
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_224/3",
|
130
130
|
},
|
131
131
|
"pali_gemma2_mix_10b_448": {
|
132
132
|
"metadata": {
|
@@ -141,7 +141,7 @@ backbone_presets = {
|
|
141
141
|
"path": "pali_gemma2",
|
142
142
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
143
143
|
},
|
144
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_448/
|
144
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_mix_10b_448/3",
|
145
145
|
},
|
146
146
|
"pali_gemma2_mix_28b_224": {
|
147
147
|
"metadata": {
|
@@ -156,7 +156,7 @@ backbone_presets = {
|
|
156
156
|
"path": "pali_gemma2",
|
157
157
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
158
158
|
},
|
159
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_224/
|
159
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_224/3",
|
160
160
|
},
|
161
161
|
"pali_gemma2_mix_28b_448": {
|
162
162
|
"metadata": {
|
@@ -171,7 +171,7 @@ backbone_presets = {
|
|
171
171
|
"path": "pali_gemma2",
|
172
172
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
173
173
|
},
|
174
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_448/
|
174
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_28b_mix_448/3",
|
175
175
|
},
|
176
176
|
"pali_gemma2_pt_3b_224": {
|
177
177
|
"metadata": {
|
@@ -231,7 +231,7 @@ backbone_presets = {
|
|
231
231
|
"path": "pali_gemma2",
|
232
232
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
233
233
|
},
|
234
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_10b_224/
|
234
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_10b_224/3",
|
235
235
|
},
|
236
236
|
"pali_gemma2_pt_10b_448": {
|
237
237
|
"metadata": {
|
@@ -246,7 +246,7 @@ backbone_presets = {
|
|
246
246
|
"path": "pali_gemma2",
|
247
247
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
248
248
|
},
|
249
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_10b_448/
|
249
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_10b_448/3",
|
250
250
|
},
|
251
251
|
"pali_gemma2_pt_10b_896": {
|
252
252
|
"metadata": {
|
@@ -261,7 +261,7 @@ backbone_presets = {
|
|
261
261
|
"path": "pali_gemma2",
|
262
262
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
263
263
|
},
|
264
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_10b_896/
|
264
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_10b_896/3",
|
265
265
|
},
|
266
266
|
"pali_gemma2_pt_28b_224": {
|
267
267
|
"metadata": {
|
@@ -276,7 +276,7 @@ backbone_presets = {
|
|
276
276
|
"path": "pali_gemma2",
|
277
277
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
278
278
|
},
|
279
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_28b_224/
|
279
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_28b_224/4",
|
280
280
|
},
|
281
281
|
"pali_gemma2_pt_28b_448": {
|
282
282
|
"metadata": {
|
@@ -291,7 +291,7 @@ backbone_presets = {
|
|
291
291
|
"path": "pali_gemma2",
|
292
292
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
293
293
|
},
|
294
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_28b_448/
|
294
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_28b_448/3",
|
295
295
|
},
|
296
296
|
"pali_gemma2_pt_28b_896": {
|
297
297
|
"metadata": {
|
@@ -306,6 +306,6 @@ backbone_presets = {
|
|
306
306
|
"path": "pali_gemma2",
|
307
307
|
"model_card": "https://www.kaggle.com/models/google/paligemma-2",
|
308
308
|
},
|
309
|
-
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_28b_896/
|
309
|
+
"kaggle_handle": "kaggle://keras/paligemma2/keras/pali_gemma2_pt_28b_896/3",
|
310
310
|
},
|
311
311
|
}
|
@@ -7,7 +7,7 @@ backbone_presets = {
|
|
7
7
|
"params": 494032768,
|
8
8
|
"path": "qwen",
|
9
9
|
},
|
10
|
-
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_0.5b_en",
|
10
|
+
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_0.5b_en/1",
|
11
11
|
},
|
12
12
|
"qwen2.5_3b_en": {
|
13
13
|
"metadata": {
|
@@ -15,7 +15,7 @@ backbone_presets = {
|
|
15
15
|
"params": 3085938688,
|
16
16
|
"path": "qwen",
|
17
17
|
},
|
18
|
-
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_3b_en",
|
18
|
+
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_3b_en/1",
|
19
19
|
},
|
20
20
|
"qwen2.5_7b_en": {
|
21
21
|
"metadata": {
|
@@ -23,7 +23,7 @@ backbone_presets = {
|
|
23
23
|
"params": 6993420288,
|
24
24
|
"path": "qwen",
|
25
25
|
},
|
26
|
-
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_7b_en/
|
26
|
+
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_7b_en/3",
|
27
27
|
},
|
28
28
|
"qwen2.5_instruct_0.5b_en": {
|
29
29
|
"metadata": {
|
@@ -34,7 +34,7 @@ backbone_presets = {
|
|
34
34
|
"params": 494032768,
|
35
35
|
"path": "qwen",
|
36
36
|
},
|
37
|
-
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_0.5b_en",
|
37
|
+
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_0.5b_en/1",
|
38
38
|
},
|
39
39
|
"qwen2.5_instruct_32b_en": {
|
40
40
|
"metadata": {
|
@@ -45,7 +45,7 @@ backbone_presets = {
|
|
45
45
|
"params": 32763876352,
|
46
46
|
"path": "qwen",
|
47
47
|
},
|
48
|
-
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_32b_en",
|
48
|
+
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_32b_en/2",
|
49
49
|
},
|
50
50
|
"qwen2.5_instruct_72b_en": {
|
51
51
|
"metadata": {
|
@@ -56,6 +56,6 @@ backbone_presets = {
|
|
56
56
|
"params": 72706203648,
|
57
57
|
"path": "qwen",
|
58
58
|
},
|
59
|
-
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_72b_en",
|
59
|
+
"kaggle_handle": "kaggle://keras/qwen/keras/qwen2.5_instruct_72b_en/2",
|
60
60
|
},
|
61
61
|
}
|
@@ -256,9 +256,6 @@ class QwenMoeAttention(keras.layers.Layer):
|
|
256
256
|
if self.dropout > 0.0:
|
257
257
|
return False
|
258
258
|
if running_on_gpu():
|
259
|
-
# GPU never supports softcap in the fused op.
|
260
|
-
if self.logit_soft_cap is not None:
|
261
|
-
return False
|
262
259
|
return gpu_supports_fused_attention_op()
|
263
260
|
elif running_on_tpu():
|
264
261
|
# TPU supports softcap with on keras >= 3.10.
|
@@ -268,7 +265,13 @@ class QwenMoeAttention(keras.layers.Layer):
|
|
268
265
|
return False
|
269
266
|
|
270
267
|
def _compute_attention(
|
271
|
-
self,
|
268
|
+
self,
|
269
|
+
query,
|
270
|
+
key,
|
271
|
+
value,
|
272
|
+
attention_mask=None,
|
273
|
+
cache_update_index=None,
|
274
|
+
**kwargs,
|
272
275
|
):
|
273
276
|
"""Computes attention using query, key, and value tensors.
|
274
277
|
|
@@ -289,11 +292,6 @@ class QwenMoeAttention(keras.layers.Layer):
|
|
289
292
|
attention_mask = ops.expand_dims(attention_mask, axis=1)
|
290
293
|
attention_mask = ops.cast(attention_mask, dtype="bool")
|
291
294
|
|
292
|
-
if self.logit_soft_cap:
|
293
|
-
kwargs = {"attn_logits_soft_cap": self.logit_soft_cap}
|
294
|
-
else:
|
295
|
-
kwargs = {}
|
296
|
-
|
297
295
|
attention_output = ops.dot_product_attention(
|
298
296
|
query,
|
299
297
|
key,
|
@@ -68,7 +68,7 @@ def convert_weights(backbone, loader, transformers_config):
|
|
68
68
|
)
|
69
69
|
## Output
|
70
70
|
loader.port_weight(
|
71
|
-
keras_variable=decoder_layer._self_attention_layer.
|
71
|
+
keras_variable=decoder_layer._self_attention_layer.output_dense.kernel,
|
72
72
|
hf_weight_key=f"model.layers.{i}.self_attn.o_proj.weight",
|
73
73
|
hook_fn=transpose_and_reshape,
|
74
74
|
)
|
keras_hub/src/version.py
CHANGED
@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=itSzodVUeuX6HQnmsSXY0Wv-5Htbu397410R-SFW_4I,
|
|
5
5
|
keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
|
6
6
|
keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
|
8
|
-
keras_hub/src/version.py,sha256=
|
8
|
+
keras_hub/src/version.py,sha256=SjWdrHYDbNitBzSsMmxG-HvuuqsSB3ICvTQclkoX-Os,222
|
9
9
|
keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
|
@@ -189,7 +189,7 @@ keras_hub/src/models/gemma/gemma_backbone.py,sha256=GzAUSArw_pN9dtWQzTVhWDbW-XyW
|
|
189
189
|
keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=3OXaIXlrKqMIuUnBk-bUz-0SYFL-XkkQTWm8qRY2YII,16770
|
190
190
|
keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
|
191
191
|
keras_hub/src/models/gemma/gemma_decoder_block.py,sha256=f5UsRO-VNsKJfm_WHVJWK4UahhzYm3sKprJ8jjr-zm4,7628
|
192
|
-
keras_hub/src/models/gemma/gemma_presets.py,sha256=
|
192
|
+
keras_hub/src/models/gemma/gemma_presets.py,sha256=ZOZEZP3MaIn4-y5i0-QxNeAVtNoWvVYTAu96wvIFMpA,7178
|
193
193
|
keras_hub/src/models/gemma/gemma_tokenizer.py,sha256=FhcyNL4lo63MqOhTQPFr07-u3BddL0fVM4TmOm8ku-I,2622
|
194
194
|
keras_hub/src/models/gemma/rms_normalization.py,sha256=fku-JEo2sNy-ytX7ySD1sRzdhRAPmYex_z8oFk1NiG8,833
|
195
195
|
keras_hub/src/models/gemma3/__init__.py,sha256=oPFadkdK5DRLD6sYx83iTetY5daWuSzmJilLjokHcbU,257
|
@@ -200,7 +200,7 @@ keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=vjt4N-zr0Eb5
|
|
200
200
|
keras_hub/src/models/gemma3/gemma3_decoder_block.py,sha256=6PLlpDxxF67stDv74fw9nNgUHBWmTLx6qGygJwyu5FY,10819
|
201
201
|
keras_hub/src/models/gemma3/gemma3_image_converter.py,sha256=czi5JrTyKiK0nFzvonviBIX8jjvLHqvGNA9RyheB31k,536
|
202
202
|
keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py,sha256=_Q5hvhA93HAJe-A2IBRKVu0_RDVht61lFQiYse_9Rm4,4597
|
203
|
-
keras_hub/src/models/gemma3/gemma3_presets.py,sha256
|
203
|
+
keras_hub/src/models/gemma3/gemma3_presets.py,sha256=tVxug3rX3w_lqZlFfyqUlVdOrfBjN0GJY5ooBx1Fe0M,5124
|
204
204
|
keras_hub/src/models/gemma3/gemma3_tokenizer.py,sha256=ZaBclFIwzJkSXDuZMBQLHUKV8RWEdZ_dsJMvMcc3qXw,3215
|
205
205
|
keras_hub/src/models/gemma3/gemma3_vision_encoder.py,sha256=7XI0oBjIfJItV5w90t5bWb3C2KzjhvDnIC7wjIq4Cns,20850
|
206
206
|
keras_hub/src/models/gemma3/rms_normalization.py,sha256=fku-JEo2sNy-ytX7ySD1sRzdhRAPmYex_z8oFk1NiG8,833
|
@@ -225,14 +225,14 @@ keras_hub/src/models/llama/llama_causal_lm.py,sha256=9bP4-XDCMgsZuH1ILIMzmwq2Fyy
|
|
225
225
|
keras_hub/src/models/llama/llama_causal_lm_preprocessor.py,sha256=VTboOMiRBoxHrwP343upLUTsv3AG65r2H8h_PNPVphE,3047
|
226
226
|
keras_hub/src/models/llama/llama_decoder.py,sha256=CfWI8ru1-uWjDs0sL6H7g8ElYXWu6h7c5XIx-2Y8lX8,9668
|
227
227
|
keras_hub/src/models/llama/llama_layernorm.py,sha256=LfRbePHUJs00Ptf7dvNaw3Aj9n1xBMBpE_rS5zzsYMo,1050
|
228
|
-
keras_hub/src/models/llama/llama_presets.py,sha256=
|
228
|
+
keras_hub/src/models/llama/llama_presets.py,sha256=B-WwL4g0Oiml1pyVwQrfIwvjm8jyLlBvwEE-KvkXayU,1902
|
229
229
|
keras_hub/src/models/llama/llama_rotary_embedding.py,sha256=nqQGl7lFXJq7xGBfoONx2-wuuvKdoydnzUjy6FGQjwo,7300
|
230
230
|
keras_hub/src/models/llama/llama_tokenizer.py,sha256=NKWhxTutQ2jd6sd3NSTy9plQyKGCmuNG7U6kVxhZU4Y,1981
|
231
231
|
keras_hub/src/models/llama3/__init__.py,sha256=Vqvr2E10cnANkrRQGNBJtVLNAu-Bg9Lx6sqKOZWFy_8,257
|
232
232
|
keras_hub/src/models/llama3/llama3_backbone.py,sha256=TEocD8X7GihQFGJAz3jPwLCqDb86nyeZ1DqBF7RgQLE,3366
|
233
233
|
keras_hub/src/models/llama3/llama3_causal_lm.py,sha256=qk_onuf7S6d7rxAntilq2Q2orggMbPEJbNHJNVe2G0U,1541
|
234
234
|
keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py,sha256=twbXel9hsQgGxDAoQhEQuVm2udnEybI4fAQTJzXAuBs,3064
|
235
|
-
keras_hub/src/models/llama3/llama3_presets.py,sha256
|
235
|
+
keras_hub/src/models/llama3/llama3_presets.py,sha256=m5WEWOKm58wp7w_SDhYFVv3XhdY6d0GfSwxlbH07rwM,4302
|
236
236
|
keras_hub/src/models/llama3/llama3_tokenizer.py,sha256=J-KxRc08vGs4olFw_4mtJs0W_dTeUyj_XxMycazBmxI,1934
|
237
237
|
keras_hub/src/models/mistral/__init__.py,sha256=vjBlzcrIsFSwJKnfwfTNMKstIEKGFTE3kVcdAdfwlnE,263
|
238
238
|
keras_hub/src/models/mistral/mistral_attention.py,sha256=nGDlD4NcIwIGlfbt3ArxdT5QAvamY7yiNEGDlTgWirU,8609
|
@@ -240,7 +240,7 @@ keras_hub/src/models/mistral/mistral_backbone.py,sha256=oatoqSX0z-xjKfXeSveL4P0D
|
|
240
240
|
keras_hub/src/models/mistral/mistral_causal_lm.py,sha256=ujCKfsbuYzr8VusqPYcnTH6rTb0MRfzsinEraVhQksc,13234
|
241
241
|
keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py,sha256=_4qq-uKktfIg_i081ZWjZGEIYZpedBwtBGpchQQ-qEk,3079
|
242
242
|
keras_hub/src/models/mistral/mistral_layer_norm.py,sha256=nimMZ5CTPK8v9eflfrGuzqmv-2vd2rGlPvcHOMwYZyg,1063
|
243
|
-
keras_hub/src/models/mistral/mistral_presets.py,sha256=
|
243
|
+
keras_hub/src/models/mistral/mistral_presets.py,sha256=ggWQwKGDMFPzUWkQIJ6Tlk7NS-dClRO95WoSTaImL9s,939
|
244
244
|
keras_hub/src/models/mistral/mistral_tokenizer.py,sha256=wyzR_Y2XwrDiBV3jIeBChSPiaOkVVaxFuLxMH2F6EYA,2005
|
245
245
|
keras_hub/src/models/mistral/mistral_transformer_decoder.py,sha256=z5FCh9TEaznvhW3JOSKmFTotRbiuQhzJTZClW2m9sEw,9556
|
246
246
|
keras_hub/src/models/mit/__init__.py,sha256=F70_0PR_nPzPdMI8XOpXDRR_nxclGjcHv3iWSWUX3w8,316
|
@@ -250,13 +250,13 @@ keras_hub/src/models/mit/mit_image_classifier_preprocessor.py,sha256=oNYs-pUK8Vn
|
|
250
250
|
keras_hub/src/models/mit/mit_image_converter.py,sha256=Mw7nV-OzyBveGuZUNFsPPKyq9jXJVW2_cVH024CNkXM,311
|
251
251
|
keras_hub/src/models/mit/mit_layers.py,sha256=HUJO5uhJ6jgwANpwbQdPlEVwLRVb3BZQ-Ftjg3B9XvY,9734
|
252
252
|
keras_hub/src/models/mit/mit_presets.py,sha256=ooLrh2OoGZKxnCGnhB6BynYJtVCXH7nDDFhgQRWt36U,4528
|
253
|
-
keras_hub/src/models/mixtral/mixtral_attention.py,sha256=
|
253
|
+
keras_hub/src/models/mixtral/mixtral_attention.py,sha256=f5aiTtstWeKG_ZwumAlYIzjIN08CpnxNdenxWNJSwZw,8713
|
254
254
|
keras_hub/src/models/mixtral/mixtral_backbone.py,sha256=vUAFXvqwVBgKxYbOsqIHzPN59bhaDrGWwOnBCzeUtt0,8034
|
255
255
|
keras_hub/src/models/mixtral/mixtral_causal_lm.py,sha256=JA1t6xTeaYX_fNo9ftRyvzdRDG3vndC-Rlwn5fnsbQo,12001
|
256
256
|
keras_hub/src/models/mixtral/mixtral_causal_lm_preprocessor.py,sha256=q2qXa9QAUWBvOWv9DeNvwsBNXSORJAbQFoQsWQ7e8V8,3079
|
257
257
|
keras_hub/src/models/mixtral/mixtral_decoder.py,sha256=CvOjhTxPnGQ_HNknZXRI6Cx1kpuHG99_TiOh-mNcsDw,18190
|
258
258
|
keras_hub/src/models/mixtral/mixtral_layer_norm.py,sha256=zfbDKZEb45FTwP0zQd7WPPp8tuiGoSNfS-DRYWkZyWw,1031
|
259
|
-
keras_hub/src/models/mixtral/mixtral_presets.py,sha256=
|
259
|
+
keras_hub/src/models/mixtral/mixtral_presets.py,sha256=AteLrYXyVjooz_DHLnBA1OMlZS6LMu7Y7gGUWddn6go,856
|
260
260
|
keras_hub/src/models/mixtral/mixtral_tokenizer.py,sha256=Kc233k879QMyX164X_CzWbqpnqEkKWNqa648guTGkBk,661
|
261
261
|
keras_hub/src/models/mobilenet/__init__.py,sha256=hxkNGGj_iAMu62iooUDEPA818sNOIgjG7pXMLEMOsAE,275
|
262
262
|
keras_hub/src/models/mobilenet/mobilenet_backbone.py,sha256=aZBSFeLUObYYoi3od9DI1KfgPCqh5GHTcAI8Y2ZHShA,29536
|
@@ -274,7 +274,7 @@ keras_hub/src/models/moonshine/moonshine_decoder.py,sha256=Exf5Gg1gsCBST53wxOgBe
|
|
274
274
|
keras_hub/src/models/moonshine/moonshine_encoder.py,sha256=NjjMO_FEBlWFSv6Appv8a3V7XovW2afvxxjXwQRgV60,8148
|
275
275
|
keras_hub/src/models/moonshine/moonshine_layers.py,sha256=EIiIMz-UK1nikrC7iusGqjb3jcvmu6VdNcnhWAQHs_M,9538
|
276
276
|
keras_hub/src/models/moonshine/moonshine_multi_head_attention.py,sha256=YaxWxdywUyOQDW-KSX9DqXkX0ttGL-p1hRtWuAnlMaE,13598
|
277
|
-
keras_hub/src/models/moonshine/moonshine_presets.py,sha256=
|
277
|
+
keras_hub/src/models/moonshine/moonshine_presets.py,sha256=oqawiALSEwZVUhGejyprF4r1009k8930bz3EWJ6YpU8,876
|
278
278
|
keras_hub/src/models/moonshine/moonshine_tokenizer.py,sha256=grD-x4hMZDJYEyxvCyV-FYvUFInYsUI08-vnBKLAl5Y,2215
|
279
279
|
keras_hub/src/models/opt/__init__.py,sha256=6Ybj8etxNaPsVcuZvaeHnKB3As92Px--dbiFAqOCIT0,239
|
280
280
|
keras_hub/src/models/opt/opt_backbone.py,sha256=mK5z_E5mSiIX5s0w4hr4IVQpT7K46W2ajZBmuMjxwaY,5873
|
@@ -288,7 +288,7 @@ keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=AViEs6YltUqWnIVo7
|
|
288
288
|
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py,sha256=F57y0fZ0wYYxfGIjfrJc1W9uQpViYFx5bvFjj5CqUbI,4814
|
289
289
|
keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py,sha256=24ABQ1vGlppV-KfWh0YqJjzM_Lu2GIwvyJ4X2XXie_A,5616
|
290
290
|
keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py,sha256=5yM_jUtrFsWIieiwfFBoP7mtPmQAwywkeLKbd7fhmzk,371
|
291
|
-
keras_hub/src/models/pali_gemma/pali_gemma_presets.py,sha256=
|
291
|
+
keras_hub/src/models/pali_gemma/pali_gemma_presets.py,sha256=DAaSzquR4_AnSjToDjgXj2zbrT5skUpXmzKoyATwwHk,13006
|
292
292
|
keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py,sha256=ljTiADHo0Ok88q-jVzwJIle2C8xcxnudLTsBLzIySaM,2415
|
293
293
|
keras_hub/src/models/pali_gemma/pali_gemma_vit.py,sha256=SbWanwCoONSwgiWQsc6lFdvhqKZ-zDW42XzQt8CNMtU,18311
|
294
294
|
keras_hub/src/models/phi3/__init__.py,sha256=zIbf1MU-ks91mEkjTRJAsk51N3BBnXDF2JM1vO-13PQ,245
|
@@ -308,16 +308,16 @@ keras_hub/src/models/qwen/qwen_causal_lm.py,sha256=_f-UHaKHp0ncxknpkpEJiW3jlng3E
|
|
308
308
|
keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py,sha256=Va-4TLJD3ycEnkS41rF3dVj4_6K0j-gxLTrREFRcyr0,609
|
309
309
|
keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSwOARG0ote-jAg,11771
|
310
310
|
keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
|
311
|
-
keras_hub/src/models/qwen/qwen_presets.py,sha256=
|
311
|
+
keras_hub/src/models/qwen/qwen_presets.py,sha256=DpRplWNwktM4KDgIP495PTUBJxQE_mS6KQSK5LGWOyc,1998
|
312
312
|
keras_hub/src/models/qwen/qwen_tokenizer.py,sha256=LCv3IyiDDHqVnM9N3lf5-BE3iwicIh0nKS1hjoPw9lE,1532
|
313
313
|
keras_hub/src/models/qwen_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
314
|
-
keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=
|
314
|
+
keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=pE79_iHUm2LGkoWL6zMJw_pNfzIvmyq3yJaiq47W2TY,13242
|
315
315
|
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py,sha256=nrfELvIvRLmrgKrUNXci2CrecmeI6bWzJj7HH-RcWJA,15341
|
316
316
|
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py,sha256=MeP60v7GcN_SmH5_ULRpqgmFVgaYAosSecZiSQVlJvU,13256
|
317
317
|
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=uKaXRrJs02vkVudjdehzJPp0B84tPMkxNHlp166kceE,589
|
318
318
|
keras_hub/src/models/qwen_moe/qwen_moe_decoder.py,sha256=kmUjLpYTbJQ3J_31qWhLOd0Dg2_9cl_JX_zM8ZMH1Qo,23130
|
319
319
|
keras_hub/src/models/qwen_moe/qwen_moe_layernorm.py,sha256=DbkWJo7U0-cwdZwHPeAnFznYwtao6o0fjpoDJ9UWnpc,927
|
320
|
-
keras_hub/src/models/qwen_moe/qwen_moe_presets.py,sha256=
|
320
|
+
keras_hub/src/models/qwen_moe/qwen_moe_presets.py,sha256=uKrA9xLV3P3jtYUUsqdhKq_HPkB4lXmOYseB1wXTZnI,457
|
321
321
|
keras_hub/src/models/qwen_moe/qwen_moe_tokenizer.py,sha256=2c3X8jNGO0q0UL5NtUqSgHWLqhyJGi2ohNcTeOGhd84,1407
|
322
322
|
keras_hub/src/models/resnet/__init__.py,sha256=C5UqlQ6apm8WSp1bnrxB6Bi3BGaknxRQs-r3b2wpaGA,257
|
323
323
|
keras_hub/src/models/resnet/resnet_backbone.py,sha256=Q7nlqcTXZzjqd0e-DsjHC4ok58yOX7qxseotym3uZpM,31276
|
@@ -490,7 +490,7 @@ keras_hub/src/utils/transformers/convert_gemma.py,sha256=ElCgwBpSN5Q7rV5PJawTsoy
|
|
490
490
|
keras_hub/src/utils/transformers/convert_gpt2.py,sha256=HCeHN_-GiQJRxLCM9OCJJ1watPVpIBF8ujS8pGbBOWc,5703
|
491
491
|
keras_hub/src/utils/transformers/convert_llama3.py,sha256=c5phNl-QayQ_BS0s-lenbu6oHxqfwDShKJoh9DluxUU,6146
|
492
492
|
keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS7eANJUXIsNy1RxWXy20Gqw,4760
|
493
|
-
keras_hub/src/utils/transformers/convert_mixtral.py,sha256=
|
493
|
+
keras_hub/src/utils/transformers/convert_mixtral.py,sha256=PxeCY8Xe7U_caICugwOCEjuSZ51ZUtmef6rUxh-Wt54,5508
|
494
494
|
keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYumf66hIid07k5NLqoeWAJgPnaLs,10649
|
495
495
|
keras_hub/src/utils/transformers/convert_qwen.py,sha256=WUxMAEFVqRs7TRw7QU5TH3_ev4yf02R1xFVliMvTQqg,5886
|
496
496
|
keras_hub/src/utils/transformers/convert_qwen_moe.py,sha256=a7R28aln-PdAcNuKAXdrtzvslho2Co6GypChxLMKPpc,10618
|
@@ -499,7 +499,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=1nfS5xVsl-JROGXJXltTqV1
|
|
499
499
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
|
500
500
|
keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
|
501
501
|
keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
|
502
|
-
keras_hub_nightly-0.21.0.
|
503
|
-
keras_hub_nightly-0.21.0.
|
504
|
-
keras_hub_nightly-0.21.0.
|
505
|
-
keras_hub_nightly-0.21.0.
|
502
|
+
keras_hub_nightly-0.21.0.dev202505230409.dist-info/METADATA,sha256=i-P2LqVLiVN0cIr63OvwxpAmqn2sGBPSUHDqbHiFhcg,7393
|
503
|
+
keras_hub_nightly-0.21.0.dev202505230409.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
504
|
+
keras_hub_nightly-0.21.0.dev202505230409.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
505
|
+
keras_hub_nightly-0.21.0.dev202505230409.dist-info/RECORD,,
|