keras-hub-nightly 0.20.0.dev202503170356__py3-none-any.whl → 0.20.0.dev202503180354__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ import keras
2
+ from keras import ops
3
+
4
+ from keras_hub.src.layers.modeling.transformer_layer_utils import (
5
+ compute_causal_mask,
6
+ )
7
+ from keras_hub.src.layers.modeling.transformer_layer_utils import (
8
+ merge_padding_and_attention_mask,
9
+ )
10
+ from keras_hub.src.models.qwen.qwen_attention import QwenAttention
11
+ from keras_hub.src.models.qwen.qwen_layernorm import QwenLayerNorm
12
+ from keras_hub.src.utils.keras_utils import clone_initializer
13
+
14
+
15
+ class QwenTransformerDecoder(keras.layers.Layer):
16
+ """A Transformer decoder layer for the Qwen backbone.
17
+
18
+ This layer implements a Transformer decoder block that includes
19
+ self-attention with optional sliding window attention and a feed-forward
20
+ network.
21
+
22
+ Args:
23
+ intermediate_dim: Output dimension of the first dense layer in the
24
+ feed-forward network.
25
+ num_query_heads: Number of query attention heads.
26
+ num_key_value_heads: Number of key/value attention heads (for GQA).
27
+ rope_max_wavelength: Maximum wavelength for RoPE (Rotary Position
28
+ Embedding).
29
+ rope_scaling_factor: Scaling factor for RoPE, used for extending
30
+ context length.
31
+ activation: Activation function to use in the feed-forward network.
32
+ layer_norm_epsilon: Small float added to variance to avoid dividing
33
+ by zero in layer norm.
34
+ kernel_initializer: Initializer for the kernel weights.
35
+ dropout: Dropout rate for attention and hidden layers.
36
+ use_sliding_window_attention: Whether to use sliding window
37
+ attention.
38
+ sliding_window_size: Size of the sliding window for attention when
39
+ enabled.
40
+ **kwargs: Additional keyword arguments to pass to the Layer.
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ intermediate_dim,
46
+ num_query_heads,
47
+ num_key_value_heads,
48
+ rope_max_wavelength=10000,
49
+ rope_scaling_factor=1.0,
50
+ activation="silu",
51
+ layer_norm_epsilon=1e-5,
52
+ kernel_initializer="glorot_uniform",
53
+ dropout=0,
54
+ use_sliding_window_attention=False,
55
+ sliding_window_size=4096,
56
+ **kwargs,
57
+ ):
58
+ super().__init__(**kwargs)
59
+ self.intermediate_dim = intermediate_dim
60
+ self.num_query_heads = num_query_heads
61
+ self.num_key_value_heads = num_key_value_heads
62
+
63
+ self.rope_max_wavelength = rope_max_wavelength
64
+ self.rope_scaling_factor = rope_scaling_factor
65
+
66
+ self.dropout = dropout
67
+
68
+ self.use_sliding_window_attention = use_sliding_window_attention
69
+ self.sliding_window_size = sliding_window_size
70
+
71
+ self.activation = keras.activations.get(activation)
72
+ self.layer_norm_epsilon = layer_norm_epsilon
73
+ self.kernel_initializer = keras.initializers.get(kernel_initializer)
74
+
75
+ self.supports_masking = True
76
+
77
+ def build(self, decoder_sequence_shape):
78
+ self._decoder_sequence_shape = decoder_sequence_shape
79
+ self.hidden_dim = decoder_sequence_shape[-1]
80
+
81
+ # Self attention layer.
82
+ self._self_attention_layer = QwenAttention(
83
+ num_query_heads=self.num_query_heads,
84
+ num_key_value_heads=self.num_key_value_heads,
85
+ rope_max_wavelength=self.rope_max_wavelength,
86
+ rope_scaling_factor=self.rope_scaling_factor,
87
+ kernel_initializer=clone_initializer(self.kernel_initializer),
88
+ dropout=self.dropout,
89
+ use_sliding_window_attention=self.use_sliding_window_attention,
90
+ sliding_window_size=self.sliding_window_size,
91
+ dtype=self.dtype_policy,
92
+ name="self_attention",
93
+ )
94
+ self._self_attention_layer.build(decoder_sequence_shape)
95
+
96
+ self._self_attention_layernorm = QwenLayerNorm(
97
+ epsilon=self.layer_norm_epsilon,
98
+ dtype=self.dtype_policy,
99
+ name="self_attention_layernorm",
100
+ )
101
+
102
+ self._self_attention_layernorm.build(decoder_sequence_shape)
103
+ self._self_attention_dropout = keras.layers.Dropout(
104
+ rate=self.dropout,
105
+ dtype=self.dtype_policy,
106
+ name="self_attention_dropout",
107
+ )
108
+
109
+ # Feedforward layers.
110
+ self._feedforward_intermediate_dense = keras.layers.Dense(
111
+ self.intermediate_dim,
112
+ kernel_initializer=clone_initializer(self.kernel_initializer),
113
+ use_bias=False,
114
+ dtype=self.dtype_policy,
115
+ name="feedforward_intermediate_dense",
116
+ )
117
+ self._feedforward_intermediate_dense.build(decoder_sequence_shape)
118
+
119
+ self._feedforward_gate_dense = keras.layers.Dense(
120
+ self.intermediate_dim,
121
+ kernel_initializer=clone_initializer(self.kernel_initializer),
122
+ use_bias=False,
123
+ dtype=self.dtype_policy,
124
+ name="feedforward_gate_dense",
125
+ )
126
+ self._feedforward_gate_dense.build(decoder_sequence_shape)
127
+
128
+ self._feedforward_output_dense = keras.layers.Dense(
129
+ self.hidden_dim,
130
+ kernel_initializer=clone_initializer(self.kernel_initializer),
131
+ use_bias=False,
132
+ dtype=self.dtype_policy,
133
+ name="feedforward_output_dense",
134
+ )
135
+
136
+ self._feedforward_output_dense.build(
137
+ self._feedforward_gate_dense.compute_output_shape(
138
+ decoder_sequence_shape
139
+ )
140
+ )
141
+
142
+ self._feedforward_layernorm = QwenLayerNorm(
143
+ epsilon=self.layer_norm_epsilon,
144
+ dtype=self.dtype_policy,
145
+ name="feedforward_layernorm",
146
+ )
147
+ self._feedforward_layernorm.build(decoder_sequence_shape)
148
+
149
+ self.built = True
150
+
151
+ def call(
152
+ self,
153
+ decoder_sequence,
154
+ decoder_padding_mask=None,
155
+ decoder_attention_mask=None,
156
+ self_attention_cache=None,
157
+ self_attention_cache_update_index=None,
158
+ training=None,
159
+ ):
160
+ """Forward pass for the decoder layer.
161
+
162
+ Args:
163
+ decoder_sequence: Input tensor of shape [batch_size, seq_length,
164
+ hidden_size].
165
+ decoder_padding_mask: Mask tensor for padding tokens.
166
+ decoder_attention_mask: Additional attention mask.
167
+ self_attention_cache: Optional cached key and value tensors for
168
+ self-attention.
169
+ self_attention_cache_update_index: Index at which to update the
170
+ cache.
171
+ training: Boolean indicating whether in training mode.
172
+
173
+ Returns:
174
+ decoder_output: Output tensor after applying transformer decoder
175
+ block.
176
+ self_attention_cache: Updated cache tensors (if cache is provided).
177
+ """
178
+ self_attention_mask = self._compute_self_attention_mask(
179
+ decoder_sequence=decoder_sequence,
180
+ decoder_padding_mask=decoder_padding_mask,
181
+ decoder_attention_mask=decoder_attention_mask,
182
+ self_attention_cache=self_attention_cache,
183
+ self_attention_cache_update_index=self_attention_cache_update_index,
184
+ )
185
+ residual = decoder_sequence
186
+
187
+ x = self._self_attention_layernorm(decoder_sequence)
188
+
189
+ # Self attention block.
190
+ x = self._self_attention_layer(
191
+ hidden_states=x,
192
+ attention_mask=self_attention_mask,
193
+ cache=self_attention_cache,
194
+ cache_update_index=self_attention_cache_update_index,
195
+ )
196
+
197
+ if self_attention_cache is not None:
198
+ x, self_attention_cache = x
199
+
200
+ x = self._self_attention_dropout(x, training=training)
201
+
202
+ x = x + residual
203
+ residual = x
204
+
205
+ x = self._feedforward_layernorm(x)
206
+ gate_output = self._feedforward_gate_dense(x)
207
+
208
+ # Note that we run the activation function in full 32-bit
209
+ # precision since this is what `torch.nn.functional.silu`
210
+ # does. Internally, `torch.nn.functional.silu` converts the
211
+ # inputs to float32, computes SiLU, and converts the outputs
212
+ # back to compute dtype.
213
+ # CPU Kernel: https://github.com/pytorch/pytorch/blob/35c493f2cf9b623bfdc7e6b34dc1cb39690a7919/aten/src/ATen/native/cpu/Activation.cpp#L1221-L1235 # noqa: E501
214
+ # CUDA Kernel: https://github.com/pytorch/pytorch/blob/35c493f2cf9b623bfdc7e6b34dc1cb39690a7919/aten/src/ATen/native/cuda/ActivationSiluKernel.cu # noqa: E501
215
+ gate_output = ops.cast(gate_output, "float32")
216
+ gate_output = self.activation(gate_output)
217
+ gate_output = ops.cast(gate_output, self.compute_dtype)
218
+
219
+ x = self._feedforward_intermediate_dense(x)
220
+
221
+ x = self._feedforward_output_dense(ops.multiply(x, gate_output))
222
+
223
+ decoder_output = x + residual
224
+
225
+ if self_attention_cache is not None:
226
+ return decoder_output, self_attention_cache
227
+ return decoder_output
228
+
229
+ def _compute_self_attention_mask(
230
+ self,
231
+ decoder_sequence,
232
+ decoder_padding_mask,
233
+ decoder_attention_mask,
234
+ self_attention_cache,
235
+ self_attention_cache_update_index,
236
+ ):
237
+ """Computes the self-attention mask combining causal, padding and
238
+ attention masks.
239
+
240
+ Args:
241
+ decoder_sequence: Input tensor.
242
+ decoder_padding_mask: Mask tensor for padding tokens.
243
+ decoder_attention_mask: Additional attention mask.
244
+ self_attention_cache: Optional cached key and value tensors.
245
+ self_attention_cache_update_index: Index at which to update the
246
+ cache.
247
+
248
+ Returns:
249
+ Combined attention mask tensor.
250
+ """
251
+ decoder_mask = merge_padding_and_attention_mask(
252
+ decoder_sequence, decoder_padding_mask, decoder_attention_mask
253
+ )
254
+ batch_size = ops.shape(decoder_sequence)[0]
255
+ input_length = output_length = ops.shape(decoder_sequence)[1]
256
+ # We need to handle a rectangular causal mask when doing cached
257
+ # decoding. For generative inference, `decoder_sequence` will
258
+ # generally be length 1, and `cache` will be the full generation length.
259
+ if self_attention_cache is not None:
260
+ input_length = ops.shape(self_attention_cache)[2]
261
+
262
+ cache_update_index = (
263
+ 0
264
+ if self_attention_cache_update_index is None
265
+ else self_attention_cache_update_index
266
+ )
267
+
268
+ causal_mask = compute_causal_mask(
269
+ batch_size, input_length, output_length, cache_update_index
270
+ )
271
+
272
+ return (
273
+ ops.minimum(decoder_mask, causal_mask)
274
+ if decoder_mask is not None
275
+ else causal_mask
276
+ )
277
+
278
+ def compute_output_shape(self, decoder_sequence_shape):
279
+ """Computes the output shape of the layer.
280
+
281
+ Args:
282
+ decoder_sequence_shape: Shape of the decoder sequence input.
283
+
284
+ Returns:
285
+ Output shape, which is the same as the input shape.
286
+ """
287
+ return decoder_sequence_shape
288
+
289
+ def get_config(self):
290
+ """Returns the config of the layer.
291
+
292
+ Returns:
293
+ Dictionary containing the parameters used to initialize this layer.
294
+ """
295
+ config = super().get_config()
296
+ config.update(
297
+ {
298
+ "intermediate_dim": self.intermediate_dim,
299
+ "num_query_heads": self.num_query_heads,
300
+ "rope_max_wavelength": self.rope_max_wavelength,
301
+ "rope_scaling_factor": self.rope_scaling_factor,
302
+ "num_key_value_heads": self.num_key_value_heads,
303
+ "activation": keras.activations.serialize(self.activation),
304
+ "layer_norm_epsilon": self.layer_norm_epsilon,
305
+ "kernel_initializer": keras.initializers.serialize(
306
+ self.kernel_initializer
307
+ ),
308
+ "dropout": self.dropout,
309
+ }
310
+ )
311
+ return config
@@ -0,0 +1,32 @@
1
+ import keras
2
+ from keras import ops
3
+
4
+
5
+ class QwenLayerNorm(keras.layers.Layer):
6
+ """A normalization layer for Qwen that implements RMS normalization."""
7
+
8
+ def __init__(self, epsilon=1e-6, **kwargs):
9
+ super().__init__(**kwargs)
10
+ self.epsilon = epsilon
11
+
12
+ def build(self, input_shape):
13
+ dim = input_shape[-1]
14
+ self.scale = self.add_weight(
15
+ name="scale",
16
+ trainable=True,
17
+ shape=(dim,),
18
+ initializer="ones",
19
+ dtype=self.variable_dtype,
20
+ )
21
+ self.built = True
22
+
23
+ def call(self, x):
24
+ x = ops.cast(x, "float32")
25
+ var = ops.mean(ops.power(x, 2), axis=-1, keepdims=True)
26
+ x = x * ops.rsqrt(var + self.epsilon)
27
+ return ops.cast(x * self.scale, self.compute_dtype)
28
+
29
+ def get_config(self):
30
+ config = super().get_config()
31
+ config.update({"epsilon": self.epsilon})
32
+ return config
@@ -0,0 +1,51 @@
1
+ from keras_hub.src.api_export import keras_hub_export
2
+ from keras_hub.src.models.qwen.qwen_backbone import QwenBackbone
3
+ from keras_hub.src.tokenizers.byte_pair_tokenizer import BytePairTokenizer
4
+
5
+
6
+ @keras_hub_export(
7
+ [
8
+ "keras_hub.tokenizers.QwenTokenizer",
9
+ "keras_hub.tokenizers.Qwen2Tokenizer",
10
+ "keras_hub.models.QwenTokenizer",
11
+ "keras_hub.models.Qwen2Tokenizer",
12
+ ]
13
+ )
14
+ class QwenTokenizer(BytePairTokenizer):
15
+ """Tokenizer for Qwen models.
16
+
17
+ This tokenizer implements byte-pair encoding (BPE) for Qwen models,
18
+ handling special tokens like BOS (beginning of sequence) and EOS (end of
19
+ sequence).
20
+
21
+ Args:
22
+ vocabulary: Dictionary mapping tokens to token IDs, or path to
23
+ vocabulary file.
24
+ merges: List of BPE merges, or path to merges file.
25
+ bos_token: Beginning of sequence token. Defaults to None.
26
+ eos_token: End of sequence token. Defaults to "<|endoftext|>".
27
+ misc_special_tokens: Set of additional special tokens. Defaults to
28
+ empty set.
29
+ """
30
+
31
+ backbone_cls = QwenBackbone
32
+
33
+ def __init__(
34
+ self,
35
+ vocabulary=None,
36
+ merges=None,
37
+ **kwargs,
38
+ ):
39
+ # Add EOS token
40
+ eos_token = "<|endoftext|>"
41
+ self._add_special_token(eos_token, "end_token")
42
+
43
+ self.start_token_id = None
44
+ self.start_token = None
45
+ self.pad_token_id = 0
46
+
47
+ super().__init__(
48
+ vocabulary=vocabulary,
49
+ merges=merges,
50
+ **kwargs,
51
+ )
@@ -0,0 +1,148 @@
1
+ import numpy as np
2
+
3
+ from keras_hub.src.models.qwen.qwen_backbone import QwenBackbone
4
+ from keras_hub.src.utils.preset_utils import load_json
5
+
6
+ backbone_cls = QwenBackbone
7
+
8
+
9
+ def convert_backbone_config(transformers_config):
10
+ return {
11
+ "vocabulary_size": transformers_config["vocab_size"],
12
+ "hidden_dim": transformers_config["hidden_size"],
13
+ "num_layers": transformers_config["num_hidden_layers"],
14
+ "num_query_heads": transformers_config["num_attention_heads"],
15
+ "num_key_value_heads": transformers_config["num_key_value_heads"],
16
+ "intermediate_dim": transformers_config["intermediate_size"],
17
+ "layer_norm_epsilon": transformers_config["rms_norm_eps"],
18
+ "rope_max_wavelength": transformers_config["rope_theta"],
19
+ "use_sliding_window": transformers_config["use_sliding_window"],
20
+ "sliding_window_size": transformers_config["sliding_window"],
21
+ }
22
+
23
+
24
+ def convert_weights(backbone, loader, transformers_config):
25
+ loader.port_weight(
26
+ keras_variable=backbone.get_layer("token_embedding").embeddings,
27
+ hf_weight_key="model.embed_tokens.weight",
28
+ )
29
+ if not backbone.tie_word_embeddings:
30
+ loader.port_weight(
31
+ keras_variable=backbone.get_layer(
32
+ "token_embedding"
33
+ ).reverse_embeddings,
34
+ hf_weight_key="lm_head.weight",
35
+ # rearrange_pattern="b a -> a b",
36
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
37
+ )
38
+
39
+ def transpose_and_reshape(x, shape):
40
+ return np.reshape(np.transpose(x), shape)
41
+
42
+ for i in range(backbone.num_layers):
43
+ decoder_layer = backbone.get_layer(f"transformer_layer_{i}")
44
+
45
+ # Input layernorm
46
+ loader.port_weight(
47
+ keras_variable=decoder_layer._self_attention_layernorm.scale,
48
+ hf_weight_key=f"model.layers.{i}.input_layernorm.weight",
49
+ )
50
+
51
+ # Attention layers
52
+
53
+ ## Query
54
+ loader.port_weight(
55
+ keras_variable=decoder_layer._self_attention_layer._query_dense.kernel,
56
+ hf_weight_key=f"model.layers.{i}.self_attn.q_proj.weight",
57
+ hook_fn=transpose_and_reshape,
58
+ )
59
+ loader.port_weight(
60
+ keras_variable=decoder_layer._self_attention_layer._query_dense.bias,
61
+ hf_weight_key=f"model.layers.{i}.self_attn.q_proj.bias",
62
+ hook_fn=transpose_and_reshape,
63
+ )
64
+ ## Key
65
+ loader.port_weight(
66
+ keras_variable=decoder_layer._self_attention_layer._key_dense.kernel,
67
+ hf_weight_key=f"model.layers.{i}.self_attn.k_proj.weight",
68
+ hook_fn=transpose_and_reshape,
69
+ )
70
+ loader.port_weight(
71
+ keras_variable=decoder_layer._self_attention_layer._key_dense.bias,
72
+ hf_weight_key=f"model.layers.{i}.self_attn.k_proj.bias",
73
+ hook_fn=transpose_and_reshape,
74
+ )
75
+ ## Value
76
+ loader.port_weight(
77
+ keras_variable=decoder_layer._self_attention_layer._value_dense.kernel,
78
+ hf_weight_key=f"model.layers.{i}.self_attn.v_proj.weight",
79
+ hook_fn=transpose_and_reshape,
80
+ )
81
+ loader.port_weight(
82
+ keras_variable=decoder_layer._self_attention_layer._value_dense.bias,
83
+ hf_weight_key=f"model.layers.{i}.self_attn.v_proj.bias",
84
+ hook_fn=transpose_and_reshape,
85
+ )
86
+ ## Output
87
+ loader.port_weight(
88
+ keras_variable=decoder_layer._self_attention_layer._output_dense.kernel,
89
+ hf_weight_key=f"model.layers.{i}.self_attn.o_proj.weight",
90
+ # rearrange_patterns="c (a b) -> a b c",
91
+ # rearrange_dims={"a": backbone.num_query_heads},
92
+ hook_fn=transpose_and_reshape,
93
+ )
94
+
95
+ # MLP layers
96
+ loader.port_weight(
97
+ keras_variable=decoder_layer._feedforward_intermediate_dense.kernel,
98
+ hf_weight_key=f"model.layers.{i}.mlp.up_proj.weight",
99
+ # rearrange_patterns="b a -> a b",
100
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
101
+ )
102
+ loader.port_weight(
103
+ keras_variable=decoder_layer._feedforward_output_dense.kernel,
104
+ hf_weight_key=f"model.layers.{i}.mlp.down_proj.weight",
105
+ # rearrange_patterns="b a -> a b",
106
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
107
+ )
108
+ loader.port_weight(
109
+ keras_variable=decoder_layer._feedforward_gate_dense.kernel,
110
+ hf_weight_key=f"model.layers.{i}.mlp.gate_proj.weight",
111
+ # rearrange_patterns="b a -> a b",
112
+ hook_fn=lambda hf_tensor, _: np.transpose(hf_tensor, axes=(1, 0)),
113
+ )
114
+
115
+ # Feedforward layernorm
116
+ loader.port_weight(
117
+ keras_variable=decoder_layer._feedforward_layernorm.scale,
118
+ hf_weight_key=f"model.layers.{i}.post_attention_layernorm.weight",
119
+ )
120
+
121
+ # Final normalization layer
122
+ loader.port_weight(
123
+ keras_variable=backbone.get_layer("sequence_output_layernorm").scale,
124
+ hf_weight_key="model.norm.weight",
125
+ )
126
+
127
+ return backbone
128
+
129
+
130
+ def convert_tokenizer(cls, preset, **kwargs):
131
+ tokenizer_config = load_json(preset, "tokenizer.json")
132
+ vocab = tokenizer_config["model"]["vocab"]
133
+ merges = tokenizer_config["model"]["merges"]
134
+
135
+ # Load all special tokens with the exception of "reserved" ones.
136
+ special_tokens = set()
137
+ for token in tokenizer_config["added_tokens"]:
138
+ if not token["content"].startswith("<|reserved_special_token_"):
139
+ vocab[token["content"]] = token["id"]
140
+ special_tokens.add(token["content"])
141
+
142
+ kwargs.update(
143
+ {
144
+ "unsplittable_tokens": list(special_tokens),
145
+ }
146
+ )
147
+
148
+ return cls(vocabulary=vocab, merges=merges, **kwargs)
@@ -12,6 +12,7 @@ from keras_hub.src.utils.transformers import convert_gpt2
12
12
  from keras_hub.src.utils.transformers import convert_llama3
13
13
  from keras_hub.src.utils.transformers import convert_mistral
14
14
  from keras_hub.src.utils.transformers import convert_pali_gemma
15
+ from keras_hub.src.utils.transformers import convert_qwen
15
16
  from keras_hub.src.utils.transformers import convert_vit
16
17
  from keras_hub.src.utils.transformers.safetensor_utils import SafetensorLoader
17
18
 
@@ -41,6 +42,8 @@ class TransformersPresetLoader(PresetLoader):
41
42
  self.converter = convert_pali_gemma
42
43
  elif model_type == "vit":
43
44
  self.converter = convert_vit
45
+ elif model_type == "qwen2":
46
+ self.converter = convert_qwen
44
47
  else:
45
48
  raise ValueError(
46
49
  "KerasHub has no converter for huggingface/transformers models "
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.20.0.dev202503170356"
4
+ __version__ = "0.20.0.dev202503180354"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: keras-hub-nightly
3
- Version: 0.20.0.dev202503170356
3
+ Version: 0.20.0.dev202503180354
4
4
  Summary: Industry-strength Natural Language Processing extensions for Keras.
5
5
  Home-page: https://github.com/keras-team/keras-hub
6
6
  Author: Keras team
@@ -2,13 +2,13 @@ keras_hub/__init__.py,sha256=QGdXyHgYt6cMUAP1ebxwc6oR86dE0dkMxNy2eOCQtFo,855
2
2
  keras_hub/api/__init__.py,sha256=EzR6D-XWsm_gDrX5LDwKEmrah_gu3ffpj8GKBudE0yI,485
3
3
  keras_hub/api/layers/__init__.py,sha256=-yHyqsjWBhmFv9RSS2cMyPcieU1RkNzcNsq9IDXSVFE,3626
4
4
  keras_hub/api/metrics/__init__.py,sha256=So8Ec-lOcTzn_UUMmAdzDm8RKkPu2dbRUm2px8gpUEI,381
5
- keras_hub/api/models/__init__.py,sha256=vtOXBt8YNXvaDrfpKRpJ6MXjU5FzMv2a44Db_P8cGUg,17606
5
+ keras_hub/api/models/__init__.py,sha256=U9LffuV0XchcdCWxl-I8qaOvYJ0bwdfq-6O_CTbb9Qc,18310
6
6
  keras_hub/api/samplers/__init__.py,sha256=n-_SEXxr2LNUzK2FqVFN7alsrkx1P_HOVTeLZKeGCdE,730
7
- keras_hub/api/tokenizers/__init__.py,sha256=lhvIqP8xqdkjmKSEBujHNxh5Tk5A3T0I7AUuMmKzx00,2597
7
+ keras_hub/api/tokenizers/__init__.py,sha256=LsVLrAxTVe9YT9ixsGYnbtWuyfPW5-FW3Wt3xV08_LE,2758
8
8
  keras_hub/api/utils/__init__.py,sha256=Gp1E6gG-RtKQS3PBEQEOz9PQvXkXaJ0ySGMqZ7myN7A,215
9
9
  keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
11
- keras_hub/src/version_utils.py,sha256=2VuoNZma825YX8T8OG2oe63hCK5X0pXyT0-5xLepnJQ,222
11
+ keras_hub/src/version_utils.py,sha256=incPTPm_OMuYw4E1pHpSIS3-f26vWZoaY7Bys-vEw7E,222
12
12
  keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -29,7 +29,7 @@ keras_hub/src/layers/modeling/transformer_encoder.py,sha256=Qe19_aR6w4PTFbzvBmSP
29
29
  keras_hub/src/layers/modeling/transformer_layer_utils.py,sha256=FuznrW33iG50B-VDN8R1RjuA5JG72yNMJ1TBgWLxR0E,3487
30
30
  keras_hub/src/layers/preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  keras_hub/src/layers/preprocessing/audio_converter.py,sha256=YGh_kQw65a1Z6S5zzSNVP-ChyLYHq3-eOYpOS53xIN8,4156
32
- keras_hub/src/layers/preprocessing/image_converter.py,sha256=ILhb8sPusjdegKbc60PVdi7RbwNtqyv-Jm50tSUs6RI,15408
32
+ keras_hub/src/layers/preprocessing/image_converter.py,sha256=E5EYbURAVNntnx0eQ886QOkOPAOc2TKrvZhe39SL5sU,15552
33
33
  keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py,sha256=itxWq3FHYlR0I7jKarQlSKbSmRLl9ut_UTSP3ZDwP0A,8162
34
34
  keras_hub/src/layers/preprocessing/multi_segment_packer.py,sha256=ZNqnUFnc9Af122Q7T6YyUoXgIdU9AgIJfsvR1UrCjFU,12068
35
35
  keras_hub/src/layers/preprocessing/preprocessing_layer.py,sha256=WyX41b9Ev_YJ5uVQVOAqD0PQasMOPDoyDjl_PkzkAkE,687
@@ -271,6 +271,14 @@ keras_hub/src/models/phi3/phi3_layernorm.py,sha256=Oqu81tGd97Lzx3kG1QEtZ0S6gbfn3
271
271
  keras_hub/src/models/phi3/phi3_presets.py,sha256=sb2ce7Gq1OikFEf2KIYG69rFKHYKj8qhlN-Ea8d6J7k,1366
272
272
  keras_hub/src/models/phi3/phi3_rotary_embedding.py,sha256=wqiRn8nETNcLc5Vsm_d_8s11Ro6ibWZbWvODdLqIOo4,5013
273
273
  keras_hub/src/models/phi3/phi3_tokenizer.py,sha256=bOPH14wTVVHJHq8mgzXLjsgvKMNhfO8eayevAPpjYVA,1992
274
+ keras_hub/src/models/qwen/__init__.py,sha256=hskG3tZUY_AYZPp0WVzbCtw37AIYENyp3DOnqHmdRBw,65
275
+ keras_hub/src/models/qwen/qwen_attention.py,sha256=vBPGdNMRnfuETxxdwDzwpObOvt3zB2qqc9kbWRRKuQg,12951
276
+ keras_hub/src/models/qwen/qwen_backbone.py,sha256=xBu2zEzFFAjKgaHOqPnxLU-j4oL3N2G2KT-uwL2zEM0,13018
277
+ keras_hub/src/models/qwen/qwen_causal_lm.py,sha256=_f-UHaKHp0ncxknpkpEJiW3jlng3E4CmddjQfz2QzJo,12249
278
+ keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py,sha256=Va-4TLJD3ycEnkS41rF3dVj4_6K0j-gxLTrREFRcyr0,609
279
+ keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSwOARG0ote-jAg,11771
280
+ keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
281
+ keras_hub/src/models/qwen/qwen_tokenizer.py,sha256=LCv3IyiDDHqVnM9N3lf5-BE3iwicIh0nKS1hjoPw9lE,1532
274
282
  keras_hub/src/models/resnet/__init__.py,sha256=C5UqlQ6apm8WSp1bnrxB6Bi3BGaknxRQs-r3b2wpaGA,257
275
283
  keras_hub/src/models/resnet/resnet_backbone.py,sha256=Q7nlqcTXZzjqd0e-DsjHC4ok58yOX7qxseotym3uZpM,31276
276
284
  keras_hub/src/models/resnet/resnet_image_classifier.py,sha256=nf35EKDzvBkfhHsK-s6Ks0nbhvKO7HEOYZm94YckyWE,510
@@ -424,10 +432,11 @@ keras_hub/src/utils/transformers/convert_gpt2.py,sha256=HCeHN_-GiQJRxLCM9OCJJ1wa
424
432
  keras_hub/src/utils/transformers/convert_llama3.py,sha256=zlg0yFscjytyOFymDwqnbuXkmYvb88qqYzAROKcpaPU,5250
425
433
  keras_hub/src/utils/transformers/convert_mistral.py,sha256=kVhN9h1ZFVhwkNW8p3wnS7eANJUXIsNy1RxWXy20Gqw,4760
426
434
  keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=B1leeDw96Yvu81hYumf66hIid07k5NLqoeWAJgPnaLs,10649
435
+ keras_hub/src/utils/transformers/convert_qwen.py,sha256=I2bfwo8AQd_JfwFpiAuCQ3k_FC66J5lY7tYt99yMc9E,5811
427
436
  keras_hub/src/utils/transformers/convert_vit.py,sha256=9SUZ9utNJhW_5cj3acMn9cRy47u2eIcDsrhmzj77o9k,5187
428
- keras_hub/src/utils/transformers/preset_loader.py,sha256=DgGJXbTSB9Na8FIR-YWWVqQPOFxHwWrGm41EwcS_EFs,3797
437
+ keras_hub/src/utils/transformers/preset_loader.py,sha256=0Hi7R8HnATcwFVLsJwMMIMWTCXHNfep4IPiRpQXqM-w,3933
429
438
  keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
430
- keras_hub_nightly-0.20.0.dev202503170356.dist-info/METADATA,sha256=Yt3QbpFldQbmp0Yu8fw1w0q1Z6zKh8WeqbRjWcAOTOM,7715
431
- keras_hub_nightly-0.20.0.dev202503170356.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
432
- keras_hub_nightly-0.20.0.dev202503170356.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
433
- keras_hub_nightly-0.20.0.dev202503170356.dist-info/RECORD,,
439
+ keras_hub_nightly-0.20.0.dev202503180354.dist-info/METADATA,sha256=J801c04q7y8QuruHO3nIl4eGkEGz6EkldpjG82IhZ3w,7715
440
+ keras_hub_nightly-0.20.0.dev202503180354.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
441
+ keras_hub_nightly-0.20.0.dev202503180354.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
442
+ keras_hub_nightly-0.20.0.dev202503180354.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (76.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5