keras-hub-nightly 0.22.0.dev202507110420__py3-none-any.whl → 0.22.0.dev202507120419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/layers/__init__.py +3 -0
- keras_hub/models/__init__.py +9 -0
- keras_hub/src/models/gemma/gemma_attention.py +1 -1
- keras_hub/src/models/hgnetv2/__init__.py +0 -0
- keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +193 -0
- keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +148 -0
- keras_hub/src/models/hgnetv2/hgnetv2_image_classifier.py +216 -0
- keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_preprocessor.py +14 -0
- keras_hub/src/models/hgnetv2/hgnetv2_image_converter.py +8 -0
- keras_hub/src/models/hgnetv2/hgnetv2_layers.py +918 -0
- keras_hub/src/models/hgnetv2/hgnetv2_presets.py +58 -0
- keras_hub/src/models/qwen3/__init__.py +5 -0
- keras_hub/src/models/qwen3/qwen3_presets.py +73 -0
- keras_hub/src/models/qwen_moe/qwen_moe_attention.py +1 -0
- keras_hub/src/utils/keras_utils.py +17 -0
- keras_hub/src/version.py +1 -1
- {keras_hub_nightly-0.22.0.dev202507110420.dist-info → keras_hub_nightly-0.22.0.dev202507120419.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.22.0.dev202507110420.dist-info → keras_hub_nightly-0.22.0.dev202507120419.dist-info}/RECORD +20 -10
- {keras_hub_nightly-0.22.0.dev202507110420.dist-info → keras_hub_nightly-0.22.0.dev202507120419.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.22.0.dev202507110420.dist-info → keras_hub_nightly-0.22.0.dev202507120419.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# Metadata for loading pretrained model weights.
|
2
|
+
backbone_presets = {
|
3
|
+
"hgnetv2_b4_ssld_stage2_ft_in1k": {
|
4
|
+
"metadata": {
|
5
|
+
"description": (
|
6
|
+
"HGNetV2 B4 model with 2-stage SSLD training, fine-tuned on "
|
7
|
+
"ImageNet-1K."
|
8
|
+
),
|
9
|
+
"params": 13599072,
|
10
|
+
"path": "hgnetv2",
|
11
|
+
},
|
12
|
+
"kaggle_handle": "",
|
13
|
+
},
|
14
|
+
"hgnetv2_b5_ssld_stage1_in22k_in1k": {
|
15
|
+
"metadata": {
|
16
|
+
"description": (
|
17
|
+
"HGNetV2 B5 model with 1-stage SSLD training, pre-trained on "
|
18
|
+
"ImageNet-22K and fine-tuned on ImageNet-1K."
|
19
|
+
),
|
20
|
+
"params": 33419680,
|
21
|
+
"path": "hgnetv2",
|
22
|
+
},
|
23
|
+
"kaggle_handle": "",
|
24
|
+
},
|
25
|
+
"hgnetv2_b5_ssld_stage2_ft_in1k": {
|
26
|
+
"metadata": {
|
27
|
+
"description": (
|
28
|
+
"HGNetV2 B5 model with 2-stage SSLD training, fine-tuned on "
|
29
|
+
"ImageNet-1K."
|
30
|
+
),
|
31
|
+
"params": 33419680,
|
32
|
+
"path": "hgnetv2",
|
33
|
+
},
|
34
|
+
"kaggle_handle": "",
|
35
|
+
},
|
36
|
+
"hgnetv2_b6_ssld_stage1_in22k_in1k": {
|
37
|
+
"metadata": {
|
38
|
+
"description": (
|
39
|
+
"HGNetV2 B6 model with 1-stage SSLD training, pre-trained on "
|
40
|
+
"ImageNet-22K and fine-tuned on ImageNet-1K."
|
41
|
+
),
|
42
|
+
"params": 69179888,
|
43
|
+
"path": "hgnetv2",
|
44
|
+
},
|
45
|
+
"kaggle_handle": "",
|
46
|
+
},
|
47
|
+
"hgnetv2_b6_ssld_stage2_ft_in1k": {
|
48
|
+
"metadata": {
|
49
|
+
"description": (
|
50
|
+
"HGNetV2 B6 model with 2-stage SSLD training, fine-tuned on "
|
51
|
+
"ImageNet-1K."
|
52
|
+
),
|
53
|
+
"params": 69179888,
|
54
|
+
"path": "hgnetv2",
|
55
|
+
},
|
56
|
+
"kaggle_handle": "",
|
57
|
+
},
|
58
|
+
}
|
@@ -0,0 +1,73 @@
|
|
1
|
+
"""Qwen3 model preset configurations."""
|
2
|
+
|
3
|
+
backbone_presets = {
|
4
|
+
"qwen3_0.6b_en": {
|
5
|
+
"metadata": {
|
6
|
+
"description": (
|
7
|
+
"28-layer Qwen3 model with 596M parameters, optimized for "
|
8
|
+
"efficiency and fast inference on resource-constrained devices."
|
9
|
+
),
|
10
|
+
"params": 596049920,
|
11
|
+
"path": "qwen-3",
|
12
|
+
},
|
13
|
+
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_0.6b_en/1",
|
14
|
+
},
|
15
|
+
"qwen3_1.7b_en": {
|
16
|
+
"metadata": {
|
17
|
+
"description": (
|
18
|
+
"28-layer Qwen3 model with 1.72B parameters, offering "
|
19
|
+
"a good balance between performance and resource usage."
|
20
|
+
),
|
21
|
+
"params": 1720574976,
|
22
|
+
"path": "qwen-3",
|
23
|
+
},
|
24
|
+
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_1.7b_en/1",
|
25
|
+
},
|
26
|
+
"qwen3_4b_en": {
|
27
|
+
"metadata": {
|
28
|
+
"description": (
|
29
|
+
"36-layer Qwen3 model with 4.02B parameters, offering improved "
|
30
|
+
"reasoning capabilities and better performance than smaller "
|
31
|
+
"variants."
|
32
|
+
),
|
33
|
+
"params": 4022468096,
|
34
|
+
"path": "qwen-3",
|
35
|
+
},
|
36
|
+
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_4b_en/1",
|
37
|
+
},
|
38
|
+
"qwen3_8b_en": {
|
39
|
+
"metadata": {
|
40
|
+
"description": (
|
41
|
+
"36-layer Qwen3 model with 8.19B parameters, featuring "
|
42
|
+
"enhanced reasoning, coding, and instruction-following "
|
43
|
+
"capabilities."
|
44
|
+
),
|
45
|
+
"params": 8190735360,
|
46
|
+
"path": "qwen-3",
|
47
|
+
},
|
48
|
+
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_8b_en/1",
|
49
|
+
},
|
50
|
+
"qwen3_14b_en": {
|
51
|
+
"metadata": {
|
52
|
+
"description": (
|
53
|
+
"40-layer Qwen3 model with 14.77B parameters, featuring "
|
54
|
+
"advanced reasoning, coding, and multilingual capabilities."
|
55
|
+
),
|
56
|
+
"params": 14768307200,
|
57
|
+
"path": "qwen-3",
|
58
|
+
},
|
59
|
+
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_14b_en/1",
|
60
|
+
},
|
61
|
+
"qwen3_32b_en": {
|
62
|
+
"metadata": {
|
63
|
+
"description": (
|
64
|
+
"64-layer Qwen3 model with 32.76B parameters, featuring "
|
65
|
+
"state-of-the-art performance across reasoning, coding, and "
|
66
|
+
"general language tasks."
|
67
|
+
),
|
68
|
+
"params": 32762123264,
|
69
|
+
"path": "qwen-3",
|
70
|
+
},
|
71
|
+
"kaggle_handle": "kaggle://keras/qwen-3/keras/qwen3_32b_en/1",
|
72
|
+
},
|
73
|
+
}
|
@@ -67,6 +67,7 @@ class QwenMoeAttention(keras.layers.Layer):
|
|
67
67
|
self.rope_scaling_factor = rope_scaling_factor
|
68
68
|
self.use_sliding_window_attention = use_sliding_window_attention
|
69
69
|
self.sliding_window_size = sliding_window_size
|
70
|
+
self.logit_soft_cap = None
|
70
71
|
|
71
72
|
def build(self, inputs_shape):
|
72
73
|
# Einsum variables:
|
@@ -71,6 +71,23 @@ def fused_attention_op_available():
|
|
71
71
|
)
|
72
72
|
return False
|
73
73
|
return True
|
74
|
+
elif (
|
75
|
+
hasattr(keras.config, "is_flash_attention_enabled")
|
76
|
+
and keras.config.backend() == "torch"
|
77
|
+
):
|
78
|
+
try:
|
79
|
+
from torch.backends.cuda import SDPAParams as SDPAParams
|
80
|
+
from torch.backends.cuda import (
|
81
|
+
can_use_flash_attention as can_use_flash_attention,
|
82
|
+
)
|
83
|
+
except ImportError:
|
84
|
+
logging.warning(
|
85
|
+
"Flash attention is not supported in your current PyTorch "
|
86
|
+
"version. Please update it by following the official guide: "
|
87
|
+
"https://pytorch.org/get-started/locally/"
|
88
|
+
)
|
89
|
+
return False
|
90
|
+
return True
|
74
91
|
else:
|
75
92
|
return False
|
76
93
|
|
keras_hub/src/version.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
keras_hub/__init__.py,sha256=bJbUZkqwhZvTb1Tqx1fbkq6mzBYiEyq-Hin3oQIkhdE,558
|
2
|
-
keras_hub/layers/__init__.py,sha256=
|
2
|
+
keras_hub/layers/__init__.py,sha256=T1XBtpT0UH9-r0Jc-ljCxtZD_ccapf86ByvUgzdRbvg,5311
|
3
3
|
keras_hub/metrics/__init__.py,sha256=KYalsMPBnfwim9BdGHFfJ5WxUKFXOQ1QoKIMT_0lwlM,439
|
4
|
-
keras_hub/models/__init__.py,sha256=
|
4
|
+
keras_hub/models/__init__.py,sha256=52UNIL7my_9g6ubPtOMDnGYeuGD4SOldfnGTVRMKTeE,27558
|
5
5
|
keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
|
6
6
|
keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
|
8
|
-
keras_hub/src/version.py,sha256=
|
8
|
+
keras_hub/src/version.py,sha256=rNXZuSfrhzhmJUw9OqMhah7oEZIE1yPYm31UkmVOLyU,222
|
9
9
|
keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
|
@@ -193,7 +193,7 @@ keras_hub/src/models/flux/flux_presets.py,sha256=z7C_FbI1_F5YETXuWpc7Yh_0w-5N0eB
|
|
193
193
|
keras_hub/src/models/flux/flux_text_to_image.py,sha256=Rf5dD2EhG0bE8Gyg9sqaA8YEexS1kdraofIkxiZDjvc,4166
|
194
194
|
keras_hub/src/models/flux/flux_text_to_image_preprocessor.py,sha256=Fs9jr97QtmRUbRRz1kITpkuhDM2GoV3n0XSFC-qQA14,2252
|
195
195
|
keras_hub/src/models/gemma/__init__.py,sha256=rVzOJMJ39bgVlT8UdC0t8PlN2c237GKTBmfHIsbPuOQ,251
|
196
|
-
keras_hub/src/models/gemma/gemma_attention.py,sha256=
|
196
|
+
keras_hub/src/models/gemma/gemma_attention.py,sha256=wmU5FgQu1Ajg-KHKVXTLHWH7pXqN4_zVJTCp_FXMcAs,10095
|
197
197
|
keras_hub/src/models/gemma/gemma_backbone.py,sha256=GzAUSArw_pN9dtWQzTVhWDbW-XyWt4GyMcFLn9hwmh0,13391
|
198
198
|
keras_hub/src/models/gemma/gemma_causal_lm.py,sha256=3OXaIXlrKqMIuUnBk-bUz-0SYFL-XkkQTWm8qRY2YII,16770
|
199
199
|
keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py,sha256=bpKkEurWIfa6Kp9s4pz84-sBDSA6ZFNHP8nXG1fFQrg,2912
|
@@ -227,6 +227,14 @@ keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py,sha256=HriMXNVjGlFTjCIgfLR
|
|
227
227
|
keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py,sha256=YiVz9qBHjQlwKgtUVrgBTFitHcX5pbmhhfHwaulyRxY,1957
|
228
228
|
keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py,sha256=hmB81V0SuI6bEsxEuFkYgq58wbcrv1YLvmXGin5T3E0,9732
|
229
229
|
keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py,sha256=aKso-8yGrynn3tZ5xm2egcXIBQo3__sWZDBtjmS3ZgU,1991
|
230
|
+
keras_hub/src/models/hgnetv2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
231
|
+
keras_hub/src/models/hgnetv2/hgnetv2_backbone.py,sha256=eqVrbU2EyB2ToxK1g2QRW90zd5GyvJ8I7PKVBgqRpfY,7966
|
232
|
+
keras_hub/src/models/hgnetv2/hgnetv2_encoder.py,sha256=VL6XCqyXieUPkqXS7fhsAT-EV6jzyN_i31EjsAizgVU,6464
|
233
|
+
keras_hub/src/models/hgnetv2/hgnetv2_image_classifier.py,sha256=62Xual9pRBkU6G_RUdCblx68Z827SCA_5q9utCXxwa0,7897
|
234
|
+
keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_preprocessor.py,sha256=df7OKvJmz2UqOXrqECvI9QdVMVkVMWhK0go9sltajnI,553
|
235
|
+
keras_hub/src/models/hgnetv2/hgnetv2_image_converter.py,sha256=qaGRtDeQwmC0PR69KWC7GzYNdWZ5cHu_exhNzdYyYzM,348
|
236
|
+
keras_hub/src/models/hgnetv2/hgnetv2_layers.py,sha256=OMUKW5VWL0xkEQl7RJYGAbTTB7qeqH3FHtMMuiQ0QmI,36418
|
237
|
+
keras_hub/src/models/hgnetv2/hgnetv2_presets.py,sha256=azRtaBynFqI1ccmP8_LMG16tjNSSeMvgo_ZFneG-bg8,1767
|
230
238
|
keras_hub/src/models/llama/__init__.py,sha256=svVZjGi71R3lVbq0AdbqlXj909mr3Rp9EPXdiO0w0G0,251
|
231
239
|
keras_hub/src/models/llama/llama_attention.py,sha256=UFHOWr69vTkOxLdgSUckGaSuUUyqlJ_xYoswWHVnTOU,8977
|
232
240
|
keras_hub/src/models/llama/llama_backbone.py,sha256=AT8kUPHEn6DT-aGY838_sZkBhByIdh82DWW8y-Sp3mE,13614
|
@@ -320,15 +328,17 @@ keras_hub/src/models/qwen/qwen_decoder.py,sha256=utmAvZlU7_nP-6pjGPDinK4JaMzsQSw
|
|
320
328
|
keras_hub/src/models/qwen/qwen_layernorm.py,sha256=DS35r3qd6g5ocL7Nhf_vNzLLMo1aI9VCSmL64dgNOYI,924
|
321
329
|
keras_hub/src/models/qwen/qwen_presets.py,sha256=1FkKV6M3yqJz4EP1xa7bEvfIQ721xXT-_ikjWX0xvww,1992
|
322
330
|
keras_hub/src/models/qwen/qwen_tokenizer.py,sha256=LCv3IyiDDHqVnM9N3lf5-BE3iwicIh0nKS1hjoPw9lE,1532
|
331
|
+
keras_hub/src/models/qwen3/__init__.py,sha256=fdndQouGmfNhB_Rj76A8my5FvpxOvRJ24DoUha-wlgw,251
|
323
332
|
keras_hub/src/models/qwen3/qwen3_attention.py,sha256=9zjuzGZa6TzaFgO4ShNCEHMPVb3r6mFZW7vzutbwUGg,13050
|
324
333
|
keras_hub/src/models/qwen3/qwen3_backbone.py,sha256=Ylpk_rRWWRxy8irlAPjJU-YrxYGpo8c9lSEO1zZl4gU,7456
|
325
334
|
keras_hub/src/models/qwen3/qwen3_causal_lm.py,sha256=cn_4WFVxhlOArtIGAaqkNzIz9Rx8IEWwCVMRFKKk26k,15531
|
326
335
|
keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py,sha256=H4g-bgvuhAUnDwjJovydK16Kes38ZFZWPvflrgHqZis,458
|
327
336
|
keras_hub/src/models/qwen3/qwen3_decoder.py,sha256=68s9jQj53zFmXE4-SGXKYHu546fXOyi9LUbnKk-HGYY,11595
|
328
337
|
keras_hub/src/models/qwen3/qwen3_layernorm.py,sha256=EJxjf7Pr6ufPQnNeuYQxkExzPjPk4PQxqMsoBeSEkDo,1073
|
338
|
+
keras_hub/src/models/qwen3/qwen3_presets.py,sha256=ZcsmPFj3Z4TBoa7ZkJK4JN1D6iHHZ6kCrqXxQE8IH_k,2524
|
329
339
|
keras_hub/src/models/qwen3/qwen3_tokenizer.py,sha256=LmPtg0vprMchDvYfTj8m5PraXI2QS3-YgdIIpIm5iAs,1448
|
330
340
|
keras_hub/src/models/qwen_moe/__init__.py,sha256=5D8GUmVDsJs0J4sVZHcXOLkZf12U96l-WtwyVee4lu8,267
|
331
|
-
keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=
|
341
|
+
keras_hub/src/models/qwen_moe/qwen_moe_attention.py,sha256=o0mcVTDMtElMYq3NSYRCfuYVdF-W8YDSU5ogensrVJg,13277
|
332
342
|
keras_hub/src/models/qwen_moe/qwen_moe_backbone.py,sha256=nrfELvIvRLmrgKrUNXci2CrecmeI6bWzJj7HH-RcWJA,15341
|
333
343
|
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm.py,sha256=MeP60v7GcN_SmH5_ULRpqgmFVgaYAosSecZiSQVlJvU,13256
|
334
344
|
keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py,sha256=9P6TT7W_fqf4HsXcmlHF-DW_anR-XoDrRN2ZFGA7Ai4,3168
|
@@ -481,7 +491,7 @@ keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py,sha256=hRv_XxoPIPDpHfO0Z
|
|
481
491
|
keras_hub/src/tokenizers/word_piece_tokenizer.py,sha256=vP6AZgbzsRiuPCt3W_n94nsF7XiERnagWcH_rqJHtVU,19943
|
482
492
|
keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py,sha256=cylrs02ZrYQ1TuZr9oyS3NrVbDwGctA3VXbIh1pFJMQ,6743
|
483
493
|
keras_hub/src/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
484
|
-
keras_hub/src/utils/keras_utils.py,sha256=
|
494
|
+
keras_hub/src/utils/keras_utils.py,sha256=IWsbg-p-XVLuOkba8PAYNf9zDo4G2RkINLr58p12MhA,5291
|
485
495
|
keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
|
486
496
|
keras_hub/src/utils/preset_utils.py,sha256=GKYFKK9YcdIrMm0_hC_KTIXgpiMYD6SauMnSRpNsDQo,34975
|
487
497
|
keras_hub/src/utils/python_utils.py,sha256=N8nWeO3san4YnGkffRXG3Ix7VEIMTKSN21FX5TuL7G8,202
|
@@ -518,7 +528,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=K5FzDAtCuXS9rmZc0Zj7UCw
|
|
518
528
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
|
519
529
|
keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
|
520
530
|
keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
|
521
|
-
keras_hub_nightly-0.22.0.
|
522
|
-
keras_hub_nightly-0.22.0.
|
523
|
-
keras_hub_nightly-0.22.0.
|
524
|
-
keras_hub_nightly-0.22.0.
|
531
|
+
keras_hub_nightly-0.22.0.dev202507120419.dist-info/METADATA,sha256=FmJeWUJIafpgqRZRIC4nvRMeDHzdClq11rKbpHIffxQ,7393
|
532
|
+
keras_hub_nightly-0.22.0.dev202507120419.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
533
|
+
keras_hub_nightly-0.22.0.dev202507120419.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
534
|
+
keras_hub_nightly-0.22.0.dev202507120419.dist-info/RECORD,,
|
File without changes
|