keras-hub-nightly 0.22.0.dev202507100418__py3-none-any.whl → 0.22.0.dev202507120419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ # Metadata for loading pretrained model weights.
2
+ backbone_presets = {
3
+ "hgnetv2_b4_ssld_stage2_ft_in1k": {
4
+ "metadata": {
5
+ "description": (
6
+ "HGNetV2 B4 model with 2-stage SSLD training, fine-tuned on "
7
+ "ImageNet-1K."
8
+ ),
9
+ "params": 13599072,
10
+ "path": "hgnetv2",
11
+ },
12
+ "kaggle_handle": "",
13
+ },
14
+ "hgnetv2_b5_ssld_stage1_in22k_in1k": {
15
+ "metadata": {
16
+ "description": (
17
+ "HGNetV2 B5 model with 1-stage SSLD training, pre-trained on "
18
+ "ImageNet-22K and fine-tuned on ImageNet-1K."
19
+ ),
20
+ "params": 33419680,
21
+ "path": "hgnetv2",
22
+ },
23
+ "kaggle_handle": "",
24
+ },
25
+ "hgnetv2_b5_ssld_stage2_ft_in1k": {
26
+ "metadata": {
27
+ "description": (
28
+ "HGNetV2 B5 model with 2-stage SSLD training, fine-tuned on "
29
+ "ImageNet-1K."
30
+ ),
31
+ "params": 33419680,
32
+ "path": "hgnetv2",
33
+ },
34
+ "kaggle_handle": "",
35
+ },
36
+ "hgnetv2_b6_ssld_stage1_in22k_in1k": {
37
+ "metadata": {
38
+ "description": (
39
+ "HGNetV2 B6 model with 1-stage SSLD training, pre-trained on "
40
+ "ImageNet-22K and fine-tuned on ImageNet-1K."
41
+ ),
42
+ "params": 69179888,
43
+ "path": "hgnetv2",
44
+ },
45
+ "kaggle_handle": "",
46
+ },
47
+ "hgnetv2_b6_ssld_stage2_ft_in1k": {
48
+ "metadata": {
49
+ "description": (
50
+ "HGNetV2 B6 model with 2-stage SSLD training, fine-tuned on "
51
+ "ImageNet-1K."
52
+ ),
53
+ "params": 69179888,
54
+ "path": "hgnetv2",
55
+ },
56
+ "kaggle_handle": "",
57
+ },
58
+ }
@@ -0,0 +1,5 @@
1
+ from keras_hub.src.models.qwen3.qwen3_backbone import Qwen3Backbone
2
+ from keras_hub.src.models.qwen3.qwen3_presets import backbone_presets
3
+ from keras_hub.src.utils.preset_utils import register_presets
4
+
5
+ register_presets(backbone_presets, Qwen3Backbone)
@@ -303,7 +303,7 @@ class Qwen3Attention(keras.layers.Layer):
303
303
  attention_mask = self._mask_sliding_window(
304
304
  attention_mask,
305
305
  cache_update_index=cache_update_index
306
- if cache_update_index
306
+ if cache_update_index is not None
307
307
  else 0,
308
308
  )
309
309
  attention_scores = self._masked_softmax(
@@ -0,0 +1,390 @@
1
+ import keras
2
+ from keras import ops
3
+
4
+ from keras_hub.src.api_export import keras_hub_export
5
+ from keras_hub.src.models.causal_lm import CausalLM
6
+ from keras_hub.src.models.qwen3.qwen3_backbone import Qwen3Backbone
7
+ from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
8
+ Qwen3CausalLMPreprocessor,
9
+ )
10
+ from keras_hub.src.utils.tensor_utils import any_equal
11
+
12
+
13
+ @keras_hub_export("keras_hub.models.Qwen3CausalLM")
14
+ class Qwen3CausalLM(CausalLM):
15
+ """An end-to-end Qwen3 model for causal language modeling.
16
+
17
+ A causal language model (LM) predicts the next token based on previous
18
+ tokens. This task setup can be used to train the model unsupervised on plain
19
+ text input, or to autoregressively generate plain text similar to the data
20
+ used for training. This task can be used for pre-training or fine-tuning a
21
+ Qwen3 model, simply by calling `fit()`.
22
+
23
+ This model has a `generate()` method, which generates text based on a
24
+ prompt. The generation strategy used is controlled by an additional
25
+ `sampler` argument on `compile()`. You can recompile the model with
26
+ different `keras_hub.samplers` objects to control the generation.
27
+ By default, `"greedy"` sampling will be used.
28
+
29
+ This model can optionally be configured with a `preprocessor` layer, in
30
+ which case it will automatically apply preprocessing to string inputs during
31
+ `fit()`, `predict()`, `evaluate()`, and `generate()`. This is done by
32
+ default when creating the model with `from_preset()`.
33
+
34
+ Args:
35
+ backbone: A `keras_hub.models.Qwen3Backbone` instance.
36
+ preprocessor: A `keras_hub.models.Qwen3CausalLMPreprocessor` or
37
+ `None`. If `None`, this model will not apply preprocessing, and
38
+ inputs should be preprocessed before calling the model.
39
+
40
+ Examples:
41
+
42
+ Use `generate()` to do text generation.
43
+ ```python
44
+ qwen3_lm = keras_hub.models.Qwen3CausalLM.from_preset("qwen3_0.6b_en")
45
+ qwen3_lm.generate("I want to say", max_length=30)
46
+
47
+ # Generate with batched prompts.
48
+ qwen3_lm.generate(["This is a", "Where are you"], max_length=30)
49
+ ```
50
+
51
+ Compile the `generate()` function with a custom sampler.
52
+ ```python
53
+ qwen3_lm = keras_hub.models.Qwen3MoeCausalLM.from_preset("qwen3_0.6b_en")
54
+ qwen3_lm.compile(sampler="top_k")
55
+ qwen3_lm.generate("I want to say", max_length=30)
56
+
57
+ qwen3_lm.compile(sampler=keras_hub.samplers.BeamSampler(num_beams=2))
58
+ qwen3_lm.generate("I want to say", max_length=30)
59
+ ```
60
+
61
+ Use `generate()` without preprocessing.
62
+ ```python
63
+ prompt = {
64
+ # Token ids for "<bos> Qwen3 is".
65
+ "token_ids": np.array([[2, 12345, 678, 0, 0, 0, 0]] * 2),
66
+ # Use `"padding_mask"` to indicate values that should not be overridden.
67
+ "padding_mask": np.array([[1, 1, 1, 0, 0, 0, 0]] * 2),
68
+ }
69
+
70
+ qwen3_lm = keras_hub.models.Qwen3MoeCausalLM.from_preset(
71
+ "qwen3_0.6b_en",
72
+ preprocessor=None,
73
+ )
74
+ qwen3_lm.generate(prompt)
75
+ ```
76
+
77
+ Call `fit()` on a single batch.
78
+ ```python
79
+ features = ["The quick brown fox jumped.", "I forgot my homework."]
80
+ qwen3_lm = keras_hub.models.Qwen3MoeCausalLM.from_preset("qwen3_0.6b_en")
81
+ qwen3_lm.fit(x=features, batch_size=2)
82
+ ```
83
+
84
+ Call `fit()` with LoRA fine-tuning enabled.
85
+ ```python
86
+ features = ["The quick brown fox jumped.", "I forgot my homework."]
87
+ qwen3_lm = keras_hub.models.Qwen3MoeCausalLM.from_preset(
88
+ 'qwen3_0.6b_en'
89
+ )
90
+ qwen3_lm.backbone.enable_lora(rank=4)
91
+ qwen3_lm.fit(x=features, batch_size=2)
92
+ ```
93
+
94
+ Call `fit()` without preprocessing.
95
+ ```python
96
+ x = {
97
+ # Token ids for "<bos> Qwen3 is a language model<eos>"
98
+ "token_ids": np.array([[2, 12345, 678, 543, 9876, 1, 0, 0]] * 2),
99
+ "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 0, 0]] * 2),
100
+ }
101
+ y = np.array([[12345, 678, 543, 9876, 1, 0, 0, 0]] * 2)
102
+ sw = np.array([[1, 1, 1, 1, 1, 0, 0, 0]] * 2)
103
+
104
+ qwen3_lm = keras_hub.models.Qwen3MoeCausalLM.from_preset(
105
+ "qwen3_0.6b_en",
106
+ preprocessor=None,
107
+ )
108
+ qwen3_lm.fit(x=x, y=y, sample_weight=sw, batch_size=2)
109
+ ```
110
+
111
+ Custom backbone and vocabulary.
112
+ ```python
113
+ tokenizer = keras_hub.models.Qwen3MoeTokenizer(
114
+ proto="qwen3_moe_vocab.spm",
115
+ )
116
+ preprocessor = keras_hub.models.Qwen3MoeCausalLMPreprocessor(
117
+ tokenizer=tokenizer,
118
+ sequence_length=128,
119
+ )
120
+ backbone = keras_hub.models.Qwen3MoeBackbone(
121
+ vocabulary_size=151936,
122
+ num_layers=28,
123
+ num_query_heads=16,
124
+ num_key_value_heads=8,
125
+ hidden_dim=2048,
126
+ intermediate_dim=4096,
127
+ moe_intermediate_dim=128,
128
+ shared_expert_intermediate_dim=4096,
129
+ num_experts=60,
130
+ top_k=4,
131
+ max_sequence_length=4096,
132
+ )
133
+ qwen3_lm = keras_hub.models.Qwen3MoeCausalLM(
134
+ backbone=backbone,
135
+ preprocessor=preprocessor,
136
+ )
137
+ qwen3_lm.fit(x=features, batch_size=2)
138
+ ```
139
+ """
140
+
141
+ backbone_cls = Qwen3Backbone
142
+ preprocessor_cls = Qwen3CausalLMPreprocessor
143
+
144
+ def __init__(self, backbone, preprocessor=None, **kwargs):
145
+ # === Layers ===
146
+ self.backbone = backbone
147
+ self.preprocessor = preprocessor
148
+
149
+ # === Functional Model ===
150
+ # This must be "backbone.input" i.e. the full input structure,
151
+ # rather than "backbone.inputs" which is the flattened list of inputs.
152
+ inputs = backbone.input
153
+ hidden_states = backbone(inputs)
154
+ outputs = backbone.token_embedding(hidden_states, reverse=True)
155
+ super().__init__(
156
+ inputs=inputs,
157
+ outputs=outputs,
158
+ **kwargs,
159
+ )
160
+
161
+ def call_with_cache(
162
+ self,
163
+ token_ids,
164
+ cache,
165
+ cache_update_index,
166
+ ):
167
+ """Forward pass of `Qwen3CausalLM` with cache.
168
+
169
+ `call_with_cache` adds an additional forward pass for the model for
170
+ autoregressive inference. Unlike calling the model directly, this method
171
+ allows caching previous key/value Tensors in multi-head attention layer,
172
+ and avoids recomputing the outputs of seen tokens.
173
+
174
+ Args:
175
+ token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
176
+ cache: a dense float Tensor, the cache of key and value.
177
+ cache_update_index: int, or int Tensor. The index of current inputs
178
+ in the whole sequence.
179
+
180
+ Returns:
181
+ A (logits, hidden_states, cache) tuple. Where `logits` is the
182
+ language model logits for the input token_ids, `hidden_states` is
183
+ the final hidden representation of the input tokens, and `cache` is
184
+ the decoding cache.
185
+ """
186
+ x = self.backbone.token_embedding(token_ids)
187
+ # Each decoder layer has a cache; we update them separately.
188
+ updated_cache = []
189
+ for i in range(self.backbone.num_layers):
190
+ current_cache = cache[:, i, ...]
191
+ x, next_cache = self.backbone.transformer_layers[i](
192
+ x,
193
+ self_attention_cache=current_cache,
194
+ self_attention_cache_update_index=cache_update_index,
195
+ )
196
+ updated_cache.append(next_cache)
197
+ cache = ops.stack(updated_cache, axis=1)
198
+ hidden_states = x = self.backbone.layer_norm(x)
199
+ logits = self.backbone.token_embedding(x, reverse=True)
200
+ return logits, hidden_states, cache
201
+
202
+ def _build_cache(self, token_ids):
203
+ """Build an empty cache for use with `call_with_cache()`."""
204
+ batch_size = ops.shape(token_ids)[0]
205
+ max_length = ops.shape(token_ids)[1]
206
+ num_layers = self.backbone.num_layers
207
+ num_key_value_heads = self.backbone.num_key_value_heads
208
+ head_dim = self.backbone.head_dim
209
+ shape = [
210
+ batch_size,
211
+ num_layers,
212
+ 2,
213
+ max_length,
214
+ num_key_value_heads,
215
+ head_dim,
216
+ ]
217
+ cache = ops.zeros(shape, dtype=self.compute_dtype)
218
+ # Seed the cache.
219
+ _, hidden_states, cache = self.call_with_cache(token_ids, cache, 0)
220
+ return hidden_states, cache
221
+
222
+ def generate_step(
223
+ self,
224
+ inputs,
225
+ stop_token_ids=None,
226
+ ):
227
+ """A compilable generation function for a single batch of inputs.
228
+
229
+ This function represents the inner, XLA-compilable, generation function
230
+ for a single batch of inputs. Inputs should have the same structure as
231
+ model inputs, a dictionary with keys `"token_ids"` and `"padding_mask"`.
232
+
233
+ Args:
234
+ inputs: A dictionary with two keys `"token_ids"` and
235
+ `"padding_mask"` and batched tensor values.
236
+ stop_token_ids: Tuple of id's of the end token to stop on. If all
237
+ sequences have produced a new stop token, generation
238
+ will stop.
239
+ """
240
+ token_ids, padding_mask = inputs["token_ids"], inputs["padding_mask"]
241
+ # Create and seed cache with a single forward pass.
242
+ hidden_states, cache = self._build_cache(token_ids)
243
+ # Compute the lengths of all user inputted tokens ids.
244
+ row_lengths = ops.sum(ops.cast(padding_mask, "int32"), axis=-1)
245
+ # Start at the first index that has no user inputted id.
246
+ index = ops.min(row_lengths)
247
+
248
+ def next(prompt, cache, index):
249
+ # The cache index is the index of our previous token.
250
+ cache_update_index = index - 1
251
+ batch_size = ops.shape(prompt)[0]
252
+ prompt = ops.slice(prompt, [0, cache_update_index], [batch_size, 1])
253
+ logits, hidden_states, cache = self.call_with_cache(
254
+ prompt,
255
+ cache,
256
+ cache_update_index,
257
+ )
258
+ return (
259
+ ops.squeeze(logits, axis=1),
260
+ ops.squeeze(hidden_states, axis=1),
261
+ cache,
262
+ )
263
+
264
+ token_ids = self.sampler(
265
+ next=next,
266
+ prompt=token_ids,
267
+ cache=cache,
268
+ index=index,
269
+ mask=padding_mask,
270
+ stop_token_ids=stop_token_ids,
271
+ hidden_states=hidden_states,
272
+ model=self,
273
+ )
274
+ print("generated token ids = ", token_ids[0])
275
+
276
+ # Compute an output padding mask with the token ids we updated.
277
+ if stop_token_ids is not None:
278
+ # Build a mask of stop token locations not in the original
279
+ # prompt (not in locations where `padding_mask` is True).
280
+ end_locations = any_equal(
281
+ token_ids, stop_token_ids, ops.logical_not(padding_mask)
282
+ )
283
+ end_locations = ops.cast(end_locations, "int32")
284
+ # Use cumsum to get ones in all locations after end_locations.
285
+ cumsum = ops.cast(ops.cumsum(end_locations, axis=-1), "int32")
286
+ overflow = cumsum - end_locations
287
+ # Our padding mask is the inverse of these overflow locations.
288
+ padding_mask = ops.logical_not(ops.cast(overflow, "bool"))
289
+ else:
290
+ # Without early stopping, all locations will have been updated.
291
+ padding_mask = ops.ones_like(token_ids, dtype="bool")
292
+ return {
293
+ "token_ids": token_ids,
294
+ "padding_mask": padding_mask,
295
+ }
296
+
297
+ def score(
298
+ self,
299
+ token_ids,
300
+ padding_mask=None,
301
+ scoring_mode="logits",
302
+ layer_intercept_fn=None,
303
+ target_ids=None,
304
+ ):
305
+ """Score a generation represented by the provided token ids.
306
+
307
+ Args:
308
+ token_ids: A <int>[batch_size, num_tokens] tensor containing tokens
309
+ to score. Typically, this tensor captures the output from a call
310
+ to `Qwen3CausalLM.generate()`, i.e., tokens for both the input
311
+ text and the model-generated text.
312
+ padding_mask: A <bool>[batch_size, num_tokens] tensor indicating the
313
+ tokens that should be preserved during generation. This is an
314
+ artifact required by the `Qwen3Backbone` and isn't influential
315
+ on the computation of this function. If omitted, this function
316
+ uses `keras.ops.ones()` to create a tensor of the appropriate
317
+ shape.
318
+ scoring_mode: The type of scores to return, either "logits" or
319
+ "loss", both will be per input token.
320
+ layer_intercept_fn: An optional function for augmenting activations
321
+ with additional computation, for example, as part of
322
+ interpretability research. This function will be passed the
323
+ activations as its first parameter and a numeric index
324
+ associated with that backbone layer. _This index _is not_ an
325
+ index into `self.backbone.layers`_. The index -1 accompanies the
326
+ embeddings returned by calling `self.backbone.token_embedding()`
327
+ on `token_ids` in the forward direction. All subsequent indexes
328
+ will be 0-based indices for the activations returned by each of
329
+ the Transformers layers in the backbone. This function must
330
+ return a <float>[batch_size, num_tokens, hidden_dims] tensor
331
+ that can be passed as an input to the next layer in the model.
332
+ target_ids: An <bool>[batch_size, num_tokens] tensor containing the
333
+ predicted tokens against which the loss should be computed. If a
334
+ span of tokens is provided (sequential truthy values along
335
+ axis=1 in the tensor), the loss will be computed as the
336
+ aggregate across those tokens.
337
+
338
+ Raises:
339
+ ValueError: If an unsupported scoring_mode is provided, or if the
340
+ target_ids are not provided when using ScoringMode.LOSS.
341
+
342
+ Returns:
343
+ The per-token scores as a tensor of size
344
+ <float>[batch_size, num_tokens, vocab_size] in "logits" mode, or
345
+ <float>[batch_size, num_tokens] in "loss" mode.
346
+
347
+ ```
348
+ """
349
+ if scoring_mode not in ("logits", "loss"):
350
+ raise ValueError(
351
+ "Unsupported scoring_mode. Must be one of 'logits' or 'loss'."
352
+ )
353
+
354
+ if scoring_mode == "loss" and target_ids is None:
355
+ raise ValueError(
356
+ "Cannot compute loss without targets. Please provide target "
357
+ "token ids via the target_ids parameter."
358
+ )
359
+
360
+ batch_shape = ops.shape(token_ids)[:2]
361
+ assert len(batch_shape) == 2
362
+
363
+ if padding_mask is None:
364
+ padding_mask = ops.ones(shape=batch_shape)
365
+
366
+ if layer_intercept_fn is None:
367
+
368
+ def default_layer_intercept_fn(x, unused_i):
369
+ return x
370
+
371
+ layer_intercept_fn = default_layer_intercept_fn
372
+
373
+ token_embeddings = self.backbone.token_embedding(token_ids)
374
+ x = layer_intercept_fn(token_embeddings, -1)
375
+
376
+ for i, transformer_layer in enumerate(self.backbone.transformer_layers):
377
+ x = transformer_layer(x, decoder_padding_mask=padding_mask)
378
+ x = layer_intercept_fn(x, i)
379
+
380
+ x = self.backbone.layer_norm(x)
381
+ logits = self.backbone.token_embedding(x, reverse=True)
382
+
383
+ if scoring_mode == "logits":
384
+ return logits
385
+
386
+ per_token_loss_fn = keras.losses.SparseCategoricalCrossentropy(
387
+ from_logits=True, reduction="none"
388
+ )
389
+ per_token_loss = per_token_loss_fn(target_ids, logits)
390
+ return per_token_loss
@@ -0,0 +1,73 @@
1
+ """Qwen3 model preset configurations."""
2
+
3
+ backbone_presets = {
4
+ "qwen3_0.6b_en": {
5
+ "metadata": {
6
+ "description": (
7
+ "28-layer Qwen3 model with 596M parameters, optimized for "
8
+ "efficiency and fast inference on resource-constrained devices."
9
+ ),
10
+ "params": 596049920,
11
+ "path": "qwen-3",
12
+ },
13
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_0.6b_en/1",
14
+ },
15
+ "qwen3_1.7b_en": {
16
+ "metadata": {
17
+ "description": (
18
+ "28-layer Qwen3 model with 1.72B parameters, offering "
19
+ "a good balance between performance and resource usage."
20
+ ),
21
+ "params": 1720574976,
22
+ "path": "qwen-3",
23
+ },
24
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_1.7b_en/1",
25
+ },
26
+ "qwen3_4b_en": {
27
+ "metadata": {
28
+ "description": (
29
+ "36-layer Qwen3 model with 4.02B parameters, offering improved "
30
+ "reasoning capabilities and better performance than smaller "
31
+ "variants."
32
+ ),
33
+ "params": 4022468096,
34
+ "path": "qwen-3",
35
+ },
36
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_4b_en/1",
37
+ },
38
+ "qwen3_8b_en": {
39
+ "metadata": {
40
+ "description": (
41
+ "36-layer Qwen3 model with 8.19B parameters, featuring "
42
+ "enhanced reasoning, coding, and instruction-following "
43
+ "capabilities."
44
+ ),
45
+ "params": 8190735360,
46
+ "path": "qwen-3",
47
+ },
48
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_8b_en/1",
49
+ },
50
+ "qwen3_14b_en": {
51
+ "metadata": {
52
+ "description": (
53
+ "40-layer Qwen3 model with 14.77B parameters, featuring "
54
+ "advanced reasoning, coding, and multilingual capabilities."
55
+ ),
56
+ "params": 14768307200,
57
+ "path": "qwen-3",
58
+ },
59
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_14b_en/1",
60
+ },
61
+ "qwen3_32b_en": {
62
+ "metadata": {
63
+ "description": (
64
+ "64-layer Qwen3 model with 32.76B parameters, featuring "
65
+ "state-of-the-art performance across reasoning, coding, and "
66
+ "general language tasks."
67
+ ),
68
+ "params": 32762123264,
69
+ "path": "qwen-3",
70
+ },
71
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_32b_en/1",
72
+ },
73
+ }
@@ -67,6 +67,7 @@ class QwenMoeAttention(keras.layers.Layer):
67
67
  self.rope_scaling_factor = rope_scaling_factor
68
68
  self.use_sliding_window_attention = use_sliding_window_attention
69
69
  self.sliding_window_size = sliding_window_size
70
+ self.logit_soft_cap = None
70
71
 
71
72
  def build(self, inputs_shape):
72
73
  # Einsum variables:
@@ -71,6 +71,23 @@ def fused_attention_op_available():
71
71
  )
72
72
  return False
73
73
  return True
74
+ elif (
75
+ hasattr(keras.config, "is_flash_attention_enabled")
76
+ and keras.config.backend() == "torch"
77
+ ):
78
+ try:
79
+ from torch.backends.cuda import SDPAParams as SDPAParams
80
+ from torch.backends.cuda import (
81
+ can_use_flash_attention as can_use_flash_attention,
82
+ )
83
+ except ImportError:
84
+ logging.warning(
85
+ "Flash attention is not supported in your current PyTorch "
86
+ "version. Please update it by following the official guide: "
87
+ "https://pytorch.org/get-started/locally/"
88
+ )
89
+ return False
90
+ return True
74
91
  else:
75
92
  return False
76
93
 
keras_hub/src/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.22.0.dev202507100418"
4
+ __version__ = "0.22.0.dev202507120419"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: keras-hub-nightly
3
- Version: 0.22.0.dev202507100418
3
+ Version: 0.22.0.dev202507120419
4
4
  Summary: Pretrained models for Keras.
5
5
  Author-email: Keras team <keras-users@googlegroups.com>
6
6
  License-Expression: Apache-2.0
@@ -1,11 +1,11 @@
1
1
  keras_hub/__init__.py,sha256=bJbUZkqwhZvTb1Tqx1fbkq6mzBYiEyq-Hin3oQIkhdE,558
2
- keras_hub/layers/__init__.py,sha256=YQ4bW0_mI39Jqj2yoc8xcnynqoaXV2FBjHJviA9Ffas,5190
2
+ keras_hub/layers/__init__.py,sha256=T1XBtpT0UH9-r0Jc-ljCxtZD_ccapf86ByvUgzdRbvg,5311
3
3
  keras_hub/metrics/__init__.py,sha256=KYalsMPBnfwim9BdGHFfJ5WxUKFXOQ1QoKIMT_0lwlM,439
4
- keras_hub/models/__init__.py,sha256=7MhCw7S-uIPcko-R6g5a-Jy1idKe7BwlI836PfekhHc,27076
4
+ keras_hub/models/__init__.py,sha256=52UNIL7my_9g6ubPtOMDnGYeuGD4SOldfnGTVRMKTeE,27558
5
5
  keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
6
6
  keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
8
- keras_hub/src/version.py,sha256=OlA90iURRl_2HAopvzvHLCIWGESZFyt2k8pv4elg3I4,222
8
+ keras_hub/src/version.py,sha256=rNXZuSfrhzhmJUw9OqMhah7oEZIE1yPYm31UkmVOLyU,222
9
9
  keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -193,7 +193,7 @@ keras_hub/src/models/flux/flux_presets.py,sha256=z7C_FbI1_F5YETXuWpc7Yh_0w-5N0eB
193
193
  keras_hub/src/models/flux/flux_text_to_image.py,sha256=Rf5dD2EhG0bE8Gyg9sqaA8YEexS1kdraofIkxiZDjvc,4166
194
194
  keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=Fs9jr97QtmRUbRRz1kITpkuhDM2GoV3n0XSFC-qQA14,2252
195
195
  keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
196
- keras_hub/src/models/gemma/gemma_attention.py,sha256=iKSdBRkKEOnryXjz6K-thz70Dgp7LGXo5vYx8D-VMgY,10083
196
+ keras_hub/src/models/gemma/gemma_attention.py,sha256=wmU5FgQu1Ajg-KHKVXTLHWH7pXqN4_zVJTCp_FXMcAs,10095
197
197
  keras_hub/src/models/gemma/gemma_backbone.py,sha256=GzAUSArw_pN9dtWQzTVhWDbW-XyWt4GyMcFLn9hwmh0,13391
198
198
  keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=3OXaIXlrKqMIuUnBk-bUz-0SYFL-XkkQTWm8qRY2YII,16770
199
199
  keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
@@ -227,6 +227,14 @@ keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py,sha256=HriMXNVjGlFTjCIgfLR
227
227
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py,sha256=YiVz9qBHjQlwKgtUVrgBTFitHcX5pbmhhfHwaulyRxY,1957
228
228
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py,sha256=hmB81V0SuI6bEsxEuFkYgq58wbcrv1YLvmXGin5T3E0,9732
229
229
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py,sha256=aKso-8yGrynn3tZ5xm2egcXIBQo3__sWZDBtjmS3ZgU,1991
230
+ keras_hub/src/models/hgnetv2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
+ keras_hub/src/models/hgnetv2/hgnetv2_backbone.py,sha256=eqVrbU2EyB2ToxK1g2QRW90zd5GyvJ8I7PKVBgqRpfY,7966
232
+ keras_hub/src/models/hgnetv2/hgnetv2_encoder.py,sha256=VL6XCqyXieUPkqXS7fhsAT-EV6jzyN_i31EjsAizgVU,6464
233
+ keras_hub/src/models/hgnetv2/hgnetv2_image_classifier.py,sha256=62Xual9pRBkU6G_RUdCblx68Z827SCA_5q9utCXxwa0,7897
234
+ keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_preprocessor.py,sha256=df7OKvJmz2UqOXrqECvI9QdVMVkVMWhK0go9sltajnI,553
235
+ keras_hub/src/models/hgnetv2/hgnetv2_image_converter.py,sha256=qaGRtDeQwmC0PR69KWC7GzYNdWZ5cHu_exhNzdYyYzM,348
236
+ keras_hub/src/models/hgnetv2/hgnetv2_layers.py,sha256=OMUKW5VWL0xkEQl7RJYGAbTTB7qeqH3FHtMMuiQ0QmI,36418
237
+ keras_hub/src/models/hgnetv2/hgnetv2_presets.py,sha256=azRtaBynFqI1ccmP8_LMG16tjNSSeMvgo_ZFneG-bg8,1767
230
238
  keras_hub/src/models/llama/__init__.py,sha256=svVZjGi71R3lVbq0AdbqlXj909mr3Rp9EPXdiO0w0G0,251
231
239
  keras_hub/src/models/llama/llama_attention.py,sha256=UFHOWr69vTkOxLdgSUckGaSuUUyqlJ_xYoswWHVnTOU,8977
232
240
  keras_hub/src/models/llama/llama_backbone.py,sha256=AT8kUPHEn6DT-aGY838_sZkBhByIdh82DWW8y-Sp3mE,13614
@@ -320,14 +328,17 @@ keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSw
320
328
  keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
321
329
  keras_hub/src/models/qwen/qwen_presets.py,sha256=1FkKV6M3yqJz4EP1xa7bEvfIQ721xXT-_ikjWX0xvww,1992
322
330
  keras_hub/src/models/qwen/qwen_tokenizer.py,sha256=LCv3IyiDDHqVnM9N3lf5-BE3iwicIh0nKS1hjoPw9lE,1532
323
- keras_hub/src/models/qwen3/qwen3_attention.py,sha256=mq2Tak0PTItkNi5LnYVcNznS61D2Cdz1-9a-WC7Vw68,13038
331
+ keras_hub/src/models/qwen3/__init__.py,sha256=fdndQouGmfNhB_Rj76A8my5FvpxOvRJ24DoUha-wlgw,251
332
+ keras_hub/src/models/qwen3/qwen3_attention.py,sha256=9zjuzGZa6TzaFgO4ShNCEHMPVb3r6mFZW7vzutbwUGg,13050
324
333
  keras_hub/src/models/qwen3/qwen3_backbone.py,sha256=Ylpk_rRWWRxy8irlAPjJU-YrxYGpo8c9lSEO1zZl4gU,7456
334
+ keras_hub/src/models/qwen3/qwen3_causal_lm.py,sha256=cn_4WFVxhlOArtIGAaqkNzIz9Rx8IEWwCVMRFKKk26k,15531
325
335
  keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py,sha256=H4g-bgvuhAUnDwjJovydK16Kes38ZFZWPvflrgHqZis,458
326
336
  keras_hub/src/models/qwen3/qwen3_decoder.py,sha256=68s9jQj53zFmXE4-SGXKYHu546fXOyi9LUbnKk-HGYY,11595
327
337
  keras_hub/src/models/qwen3/qwen3_layernorm.py,sha256=EJxjf7Pr6ufPQnNeuYQxkExzPjPk4PQxqMsoBeSEkDo,1073
338
+ keras_hub/src/models/qwen3/qwen3_presets.py,sha256=ZcsmPFj3Z4TBoa7ZkJK4JN1D6iHHZ6kCrqXxQE8IH_k,2524
328
339
  keras_hub/src/models/qwen3/qwen3_tokenizer.py,sha256=LmPtg0vprMchDvYfTj8m5PraXI2QS3-YgdIIpIm5iAs,1448
329
340
  keras_hub/src/models/qwen_moe/__init__.py,sha256=5D8GUmVDsJs0J4sVZHcXOLkZf12U96l-WtwyVee4lu8,267
330
- keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=pE79_iHUm2LGkoWL6zMJw_pNfzIvmyq3yJaiq47W2TY,13242
341
+ keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=o0mcVTDMtElMYq3NSYRCfuYVdF-W8YDSU5ogensrVJg,13277
331
342
  keras_hub/src/models/qwen_moe/qwen_moe_backbone.py,sha256=nrfELvIvRLmrgKrUNXci2CrecmeI6bWzJj7HH-RcWJA,15341
332
343
  keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py,sha256=MeP60v7GcN_SmH5_ULRpqgmFVgaYAosSecZiSQVlJvU,13256
333
344
  keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=9P6TT7W_fqf4HsXcmlHF-DW_anR-XoDrRN2ZFGA7Ai4,3168
@@ -480,7 +491,7 @@ keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=hRv_XxoPIPDpHfO0Z
480
491
  keras_hub/src/tokenizers/word_piece_tokenizer.py,sha256=vP6AZgbzsRiuPCt3W_n94nsF7XiERnagWcH_rqJHtVU,19943
481
492
  keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=cylrs02ZrYQ1TuZr9oyS3NrVbDwGctA3VXbIh1pFJMQ,6743
482
493
  keras_hub/src/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
483
- keras_hub/src/utils/keras_utils.py,sha256=2qrh4F-rqceVFSx0-cbsFBfWae5hBXFb_sEtPPcImf4,4628
494
+ keras_hub/src/utils/keras_utils.py,sha256=IWsbg-p-XVLuOkba8PAYNf9zDo4G2RkINLr58p12MhA,5291
484
495
  keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
485
496
  keras_hub/src/utils/preset_utils.py,sha256=GKYFKK9YcdIrMm0_hC_KTIXgpiMYD6SauMnSRpNsDQo,34975
486
497
  keras_hub/src/utils/python_utils.py,sha256=N8nWeO3san4YnGkffRXG3Ix7VEIMTKSN21FX5TuL7G8,202
@@ -517,7 +528,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=K5FzDAtCuXS9rmZc0Zj7UCw
517
528
  keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
518
529
  keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
519
530
  keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
520
- keras_hub_nightly-0.22.0.dev202507100418.dist-info/METADATA,sha256=3q4dYFBGZElOUYjNy4xHaOngkHOn7wbO09Gfv7VJvVw,7393
521
- keras_hub_nightly-0.22.0.dev202507100418.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
522
- keras_hub_nightly-0.22.0.dev202507100418.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
523
- keras_hub_nightly-0.22.0.dev202507100418.dist-info/RECORD,,
531
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/METADATA,sha256=FmJeWUJIafpgqRZRIC4nvRMeDHzdClq11rKbpHIffxQ,7393
532
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
533
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
534
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/RECORD,,