keras-hub-nightly 0.22.0.dev202507110420__py3-none-any.whl → 0.22.0.dev202507120419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ # Metadata for loading pretrained model weights.
2
+ backbone_presets = {
3
+ "hgnetv2_b4_ssld_stage2_ft_in1k": {
4
+ "metadata": {
5
+ "description": (
6
+ "HGNetV2 B4 model with 2-stage SSLD training, fine-tuned on "
7
+ "ImageNet-1K."
8
+ ),
9
+ "params": 13599072,
10
+ "path": "hgnetv2",
11
+ },
12
+ "kaggle_handle": "",
13
+ },
14
+ "hgnetv2_b5_ssld_stage1_in22k_in1k": {
15
+ "metadata": {
16
+ "description": (
17
+ "HGNetV2 B5 model with 1-stage SSLD training, pre-trained on "
18
+ "ImageNet-22K and fine-tuned on ImageNet-1K."
19
+ ),
20
+ "params": 33419680,
21
+ "path": "hgnetv2",
22
+ },
23
+ "kaggle_handle": "",
24
+ },
25
+ "hgnetv2_b5_ssld_stage2_ft_in1k": {
26
+ "metadata": {
27
+ "description": (
28
+ "HGNetV2 B5 model with 2-stage SSLD training, fine-tuned on "
29
+ "ImageNet-1K."
30
+ ),
31
+ "params": 33419680,
32
+ "path": "hgnetv2",
33
+ },
34
+ "kaggle_handle": "",
35
+ },
36
+ "hgnetv2_b6_ssld_stage1_in22k_in1k": {
37
+ "metadata": {
38
+ "description": (
39
+ "HGNetV2 B6 model with 1-stage SSLD training, pre-trained on "
40
+ "ImageNet-22K and fine-tuned on ImageNet-1K."
41
+ ),
42
+ "params": 69179888,
43
+ "path": "hgnetv2",
44
+ },
45
+ "kaggle_handle": "",
46
+ },
47
+ "hgnetv2_b6_ssld_stage2_ft_in1k": {
48
+ "metadata": {
49
+ "description": (
50
+ "HGNetV2 B6 model with 2-stage SSLD training, fine-tuned on "
51
+ "ImageNet-1K."
52
+ ),
53
+ "params": 69179888,
54
+ "path": "hgnetv2",
55
+ },
56
+ "kaggle_handle": "",
57
+ },
58
+ }
@@ -0,0 +1,5 @@
1
+ from keras_hub.src.models.qwen3.qwen3_backbone import Qwen3Backbone
2
+ from keras_hub.src.models.qwen3.qwen3_presets import backbone_presets
3
+ from keras_hub.src.utils.preset_utils import register_presets
4
+
5
+ register_presets(backbone_presets, Qwen3Backbone)
@@ -0,0 +1,73 @@
1
+ """Qwen3 model preset configurations."""
2
+
3
+ backbone_presets = {
4
+ "qwen3_0.6b_en": {
5
+ "metadata": {
6
+ "description": (
7
+ "28-layer Qwen3 model with 596M parameters, optimized for "
8
+ "efficiency and fast inference on resource-constrained devices."
9
+ ),
10
+ "params": 596049920,
11
+ "path": "qwen-3",
12
+ },
13
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_0.6b_en/1",
14
+ },
15
+ "qwen3_1.7b_en": {
16
+ "metadata": {
17
+ "description": (
18
+ "28-layer Qwen3 model with 1.72B parameters, offering "
19
+ "a good balance between performance and resource usage."
20
+ ),
21
+ "params": 1720574976,
22
+ "path": "qwen-3",
23
+ },
24
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_1.7b_en/1",
25
+ },
26
+ "qwen3_4b_en": {
27
+ "metadata": {
28
+ "description": (
29
+ "36-layer Qwen3 model with 4.02B parameters, offering improved "
30
+ "reasoning capabilities and better performance than smaller "
31
+ "variants."
32
+ ),
33
+ "params": 4022468096,
34
+ "path": "qwen-3",
35
+ },
36
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_4b_en/1",
37
+ },
38
+ "qwen3_8b_en": {
39
+ "metadata": {
40
+ "description": (
41
+ "36-layer Qwen3 model with 8.19B parameters, featuring "
42
+ "enhanced reasoning, coding, and instruction-following "
43
+ "capabilities."
44
+ ),
45
+ "params": 8190735360,
46
+ "path": "qwen-3",
47
+ },
48
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_8b_en/1",
49
+ },
50
+ "qwen3_14b_en": {
51
+ "metadata": {
52
+ "description": (
53
+ "40-layer Qwen3 model with 14.77B parameters, featuring "
54
+ "advanced reasoning, coding, and multilingual capabilities."
55
+ ),
56
+ "params": 14768307200,
57
+ "path": "qwen-3",
58
+ },
59
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_14b_en/1",
60
+ },
61
+ "qwen3_32b_en": {
62
+ "metadata": {
63
+ "description": (
64
+ "64-layer Qwen3 model with 32.76B parameters, featuring "
65
+ "state-of-the-art performance across reasoning, coding, and "
66
+ "general language tasks."
67
+ ),
68
+ "params": 32762123264,
69
+ "path": "qwen-3",
70
+ },
71
+ "kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_32b_en/1",
72
+ },
73
+ }
@@ -67,6 +67,7 @@ class QwenMoeAttention(keras.layers.Layer):
67
67
  self.rope_scaling_factor = rope_scaling_factor
68
68
  self.use_sliding_window_attention = use_sliding_window_attention
69
69
  self.sliding_window_size = sliding_window_size
70
+ self.logit_soft_cap = None
70
71
 
71
72
  def build(self, inputs_shape):
72
73
  # Einsum variables:
@@ -71,6 +71,23 @@ def fused_attention_op_available():
71
71
  )
72
72
  return False
73
73
  return True
74
+ elif (
75
+ hasattr(keras.config, "is_flash_attention_enabled")
76
+ and keras.config.backend() == "torch"
77
+ ):
78
+ try:
79
+ from torch.backends.cuda import SDPAParams as SDPAParams
80
+ from torch.backends.cuda import (
81
+ can_use_flash_attention as can_use_flash_attention,
82
+ )
83
+ except ImportError:
84
+ logging.warning(
85
+ "Flash attention is not supported in your current PyTorch "
86
+ "version. Please update it by following the official guide: "
87
+ "https://pytorch.org/get-started/locally/"
88
+ )
89
+ return False
90
+ return True
74
91
  else:
75
92
  return False
76
93
 
keras_hub/src/version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
 
3
3
  # Unique source of truth for the version number.
4
- __version__ = "0.22.0.dev202507110420"
4
+ __version__ = "0.22.0.dev202507120419"
5
5
 
6
6
 
7
7
  @keras_hub_export("keras_hub.version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: keras-hub-nightly
3
- Version: 0.22.0.dev202507110420
3
+ Version: 0.22.0.dev202507120419
4
4
  Summary: Pretrained models for Keras.
5
5
  Author-email: Keras team <keras-users@googlegroups.com>
6
6
  License-Expression: Apache-2.0
@@ -1,11 +1,11 @@
1
1
  keras_hub/__init__.py,sha256=bJbUZkqwhZvTb1Tqx1fbkq6mzBYiEyq-Hin3oQIkhdE,558
2
- keras_hub/layers/__init__.py,sha256=YQ4bW0_mI39Jqj2yoc8xcnynqoaXV2FBjHJviA9Ffas,5190
2
+ keras_hub/layers/__init__.py,sha256=T1XBtpT0UH9-r0Jc-ljCxtZD_ccapf86ByvUgzdRbvg,5311
3
3
  keras_hub/metrics/__init__.py,sha256=KYalsMPBnfwim9BdGHFfJ5WxUKFXOQ1QoKIMT_0lwlM,439
4
- keras_hub/models/__init__.py,sha256=MLS1BAaqj3feo4_xk2Nh_BgsP-cU0xH5LGCWn8DnOac,27171
4
+ keras_hub/models/__init__.py,sha256=52UNIL7my_9g6ubPtOMDnGYeuGD4SOldfnGTVRMKTeE,27558
5
5
  keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
6
6
  keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
8
- keras_hub/src/version.py,sha256=dSWginL7BmQjFR-tqM-dtE5eAT7UEak-TCtuQhEAsto,222
8
+ keras_hub/src/version.py,sha256=rNXZuSfrhzhmJUw9OqMhah7oEZIE1yPYm31UkmVOLyU,222
9
9
  keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
@@ -193,7 +193,7 @@ keras_hub/src/models/flux/flux_presets.py,sha256=z7C_FbI1_F5YETXuWpc7Yh_0w-5N0eB
193
193
  keras_hub/src/models/flux/flux_text_to_image.py,sha256=Rf5dD2EhG0bE8Gyg9sqaA8YEexS1kdraofIkxiZDjvc,4166
194
194
  keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=Fs9jr97QtmRUbRRz1kITpkuhDM2GoV3n0XSFC-qQA14,2252
195
195
  keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
196
- keras_hub/src/models/gemma/gemma_attention.py,sha256=iKSdBRkKEOnryXjz6K-thz70Dgp7LGXo5vYx8D-VMgY,10083
196
+ keras_hub/src/models/gemma/gemma_attention.py,sha256=wmU5FgQu1Ajg-KHKVXTLHWH7pXqN4_zVJTCp_FXMcAs,10095
197
197
  keras_hub/src/models/gemma/gemma_backbone.py,sha256=GzAUSArw_pN9dtWQzTVhWDbW-XyWt4GyMcFLn9hwmh0,13391
198
198
  keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=3OXaIXlrKqMIuUnBk-bUz-0SYFL-XkkQTWm8qRY2YII,16770
199
199
  keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
@@ -227,6 +227,14 @@ keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py,sha256=HriMXNVjGlFTjCIgfLR
227
227
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py,sha256=YiVz9qBHjQlwKgtUVrgBTFitHcX5pbmhhfHwaulyRxY,1957
228
228
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py,sha256=hmB81V0SuI6bEsxEuFkYgq58wbcrv1YLvmXGin5T3E0,9732
229
229
  keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py,sha256=aKso-8yGrynn3tZ5xm2egcXIBQo3__sWZDBtjmS3ZgU,1991
230
+ keras_hub/src/models/hgnetv2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
+ keras_hub/src/models/hgnetv2/hgnetv2_backbone.py,sha256=eqVrbU2EyB2ToxK1g2QRW90zd5GyvJ8I7PKVBgqRpfY,7966
232
+ keras_hub/src/models/hgnetv2/hgnetv2_encoder.py,sha256=VL6XCqyXieUPkqXS7fhsAT-EV6jzyN_i31EjsAizgVU,6464
233
+ keras_hub/src/models/hgnetv2/hgnetv2_image_classifier.py,sha256=62Xual9pRBkU6G_RUdCblx68Z827SCA_5q9utCXxwa0,7897
234
+ keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_preprocessor.py,sha256=df7OKvJmz2UqOXrqECvI9QdVMVkVMWhK0go9sltajnI,553
235
+ keras_hub/src/models/hgnetv2/hgnetv2_image_converter.py,sha256=qaGRtDeQwmC0PR69KWC7GzYNdWZ5cHu_exhNzdYyYzM,348
236
+ keras_hub/src/models/hgnetv2/hgnetv2_layers.py,sha256=OMUKW5VWL0xkEQl7RJYGAbTTB7qeqH3FHtMMuiQ0QmI,36418
237
+ keras_hub/src/models/hgnetv2/hgnetv2_presets.py,sha256=azRtaBynFqI1ccmP8_LMG16tjNSSeMvgo_ZFneG-bg8,1767
230
238
  keras_hub/src/models/llama/__init__.py,sha256=svVZjGi71R3lVbq0AdbqlXj909mr3Rp9EPXdiO0w0G0,251
231
239
  keras_hub/src/models/llama/llama_attention.py,sha256=UFHOWr69vTkOxLdgSUckGaSuUUyqlJ_xYoswWHVnTOU,8977
232
240
  keras_hub/src/models/llama/llama_backbone.py,sha256=AT8kUPHEn6DT-aGY838_sZkBhByIdh82DWW8y-Sp3mE,13614
@@ -320,15 +328,17 @@ keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSw
320
328
  keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
321
329
  keras_hub/src/models/qwen/qwen_presets.py,sha256=1FkKV6M3yqJz4EP1xa7bEvfIQ721xXT-_ikjWX0xvww,1992
322
330
  keras_hub/src/models/qwen/qwen_tokenizer.py,sha256=LCv3IyiDDHqVnM9N3lf5-BE3iwicIh0nKS1hjoPw9lE,1532
331
+ keras_hub/src/models/qwen3/__init__.py,sha256=fdndQouGmfNhB_Rj76A8my5FvpxOvRJ24DoUha-wlgw,251
323
332
  keras_hub/src/models/qwen3/qwen3_attention.py,sha256=9zjuzGZa6TzaFgO4ShNCEHMPVb3r6mFZW7vzutbwUGg,13050
324
333
  keras_hub/src/models/qwen3/qwen3_backbone.py,sha256=Ylpk_rRWWRxy8irlAPjJU-YrxYGpo8c9lSEO1zZl4gU,7456
325
334
  keras_hub/src/models/qwen3/qwen3_causal_lm.py,sha256=cn_4WFVxhlOArtIGAaqkNzIz9Rx8IEWwCVMRFKKk26k,15531
326
335
  keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py,sha256=H4g-bgvuhAUnDwjJovydK16Kes38ZFZWPvflrgHqZis,458
327
336
  keras_hub/src/models/qwen3/qwen3_decoder.py,sha256=68s9jQj53zFmXE4-SGXKYHu546fXOyi9LUbnKk-HGYY,11595
328
337
  keras_hub/src/models/qwen3/qwen3_layernorm.py,sha256=EJxjf7Pr6ufPQnNeuYQxkExzPjPk4PQxqMsoBeSEkDo,1073
338
+ keras_hub/src/models/qwen3/qwen3_presets.py,sha256=ZcsmPFj3Z4TBoa7ZkJK4JN1D6iHHZ6kCrqXxQE8IH_k,2524
329
339
  keras_hub/src/models/qwen3/qwen3_tokenizer.py,sha256=LmPtg0vprMchDvYfTj8m5PraXI2QS3-YgdIIpIm5iAs,1448
330
340
  keras_hub/src/models/qwen_moe/__init__.py,sha256=5D8GUmVDsJs0J4sVZHcXOLkZf12U96l-WtwyVee4lu8,267
331
- keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=pE79_iHUm2LGkoWL6zMJw_pNfzIvmyq3yJaiq47W2TY,13242
341
+ keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=o0mcVTDMtElMYq3NSYRCfuYVdF-W8YDSU5ogensrVJg,13277
332
342
  keras_hub/src/models/qwen_moe/qwen_moe_backbone.py,sha256=nrfELvIvRLmrgKrUNXci2CrecmeI6bWzJj7HH-RcWJA,15341
333
343
  keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py,sha256=MeP60v7GcN_SmH5_ULRpqgmFVgaYAosSecZiSQVlJvU,13256
334
344
  keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=9P6TT7W_fqf4HsXcmlHF-DW_anR-XoDrRN2ZFGA7Ai4,3168
@@ -481,7 +491,7 @@ keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=hRv_XxoPIPDpHfO0Z
481
491
  keras_hub/src/tokenizers/word_piece_tokenizer.py,sha256=vP6AZgbzsRiuPCt3W_n94nsF7XiERnagWcH_rqJHtVU,19943
482
492
  keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=cylrs02ZrYQ1TuZr9oyS3NrVbDwGctA3VXbIh1pFJMQ,6743
483
493
  keras_hub/src/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
484
- keras_hub/src/utils/keras_utils.py,sha256=2qrh4F-rqceVFSx0-cbsFBfWae5hBXFb_sEtPPcImf4,4628
494
+ keras_hub/src/utils/keras_utils.py,sha256=IWsbg-p-XVLuOkba8PAYNf9zDo4G2RkINLr58p12MhA,5291
485
495
  keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
486
496
  keras_hub/src/utils/preset_utils.py,sha256=GKYFKK9YcdIrMm0_hC_KTIXgpiMYD6SauMnSRpNsDQo,34975
487
497
  keras_hub/src/utils/python_utils.py,sha256=N8nWeO3san4YnGkffRXG3Ix7VEIMTKSN21FX5TuL7G8,202
@@ -518,7 +528,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=K5FzDAtCuXS9rmZc0Zj7UCw
518
528
  keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
519
529
  keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
520
530
  keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
521
- keras_hub_nightly-0.22.0.dev202507110420.dist-info/METADATA,sha256=bYZi3YvbFAqx9LxRjaliOlGdM8ljETHH2EJpizEjW1I,7393
522
- keras_hub_nightly-0.22.0.dev202507110420.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
523
- keras_hub_nightly-0.22.0.dev202507110420.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
524
- keras_hub_nightly-0.22.0.dev202507110420.dist-info/RECORD,,
531
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/METADATA,sha256=FmJeWUJIafpgqRZRIC4nvRMeDHzdClq11rKbpHIffxQ,7393
532
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
533
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
534
+ keras_hub_nightly-0.22.0.dev202507120419.dist-info/RECORD,,