keras-hub-nightly 0.22.0.dev202508170419__py3-none-any.whl → 0.24.0.dev202511090424__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of keras-hub-nightly might be problematic. Click here for more details.
- keras_hub/layers/__init__.py +15 -0
- keras_hub/models/__init__.py +93 -0
- keras_hub/src/layers/modeling/position_embedding.py +21 -6
- keras_hub/src/layers/modeling/reversible_embedding.py +8 -1
- keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
- keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
- keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
- keras_hub/src/models/backbone.py +28 -16
- keras_hub/src/models/causal_lm.py +37 -0
- keras_hub/src/models/causal_lm_preprocessor.py +14 -0
- keras_hub/src/models/clip/clip_presets.py +8 -8
- keras_hub/src/models/d_fine/__init__.py +5 -0
- keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
- keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
- keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
- keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
- keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
- keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
- keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
- keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
- keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
- keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/d_fine/d_fine_presets.py +155 -0
- keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +7 -2
- keras_hub/src/models/depth_anything/__init__.py +9 -0
- keras_hub/src/models/depth_anything/depth_anything_backbone.py +232 -0
- keras_hub/src/models/depth_anything/depth_anything_depth_estimator.py +70 -0
- keras_hub/src/models/depth_anything/depth_anything_depth_estimator_preprocessor.py +16 -0
- keras_hub/src/models/depth_anything/depth_anything_image_converter.py +10 -0
- keras_hub/src/models/depth_anything/depth_anything_layers.py +725 -0
- keras_hub/src/models/depth_anything/depth_anything_loss.py +89 -0
- keras_hub/src/models/depth_anything/depth_anything_presets.py +41 -0
- keras_hub/src/models/depth_anything/interpolate.py +62 -0
- keras_hub/src/models/depth_estimator.py +239 -0
- keras_hub/src/models/depth_estimator_preprocessor.py +78 -0
- keras_hub/src/models/dinov2/dinov2_backbone.py +29 -3
- keras_hub/src/models/dinov2/dinov2_layers.py +16 -4
- keras_hub/src/models/dinov3/__init__.py +5 -0
- keras_hub/src/models/dinov3/dinov3_backbone.py +263 -0
- keras_hub/src/models/dinov3/dinov3_image_converter.py +8 -0
- keras_hub/src/models/dinov3/dinov3_layers.py +1013 -0
- keras_hub/src/models/dinov3/dinov3_presets.py +4 -0
- keras_hub/src/models/gemma/gemma_backbone.py +0 -1
- keras_hub/src/models/gemma/gemma_presets.py +30 -0
- keras_hub/src/models/gemma3/gemma3_attention.py +48 -0
- keras_hub/src/models/gemma3/gemma3_backbone.py +4 -1
- keras_hub/src/models/gemma3/gemma3_decoder_block.py +12 -0
- keras_hub/src/models/gemma3/gemma3_presets.py +39 -0
- keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
- keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
- keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
- keras_hub/src/models/image_to_image.py +5 -0
- keras_hub/src/models/inpaint.py +5 -0
- keras_hub/src/models/mobilenetv5/__init__.py +9 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_attention.py +699 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_backbone.py +396 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_blocks.py +890 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_builder.py +436 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier.py +157 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_image_converter.py +10 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_layers.py +462 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_presets.py +15 -0
- keras_hub/src/models/mobilenetv5/mobilenetv5_utils.py +146 -0
- keras_hub/src/models/parseq/__init__.py +5 -0
- keras_hub/src/models/parseq/parseq_backbone.py +134 -0
- keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
- keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
- keras_hub/src/models/parseq/parseq_decoder.py +418 -0
- keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
- keras_hub/src/models/parseq/parseq_presets.py +15 -0
- keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
- keras_hub/src/models/qwen3_moe/__init__.py +5 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py +371 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +365 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm.py +357 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_preprocessor.py +12 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_decoder.py +672 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_layernorm.py +45 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +30 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_tokenizer.py +48 -0
- keras_hub/src/models/sam/sam_prompt_encoder.py +3 -1
- keras_hub/src/models/siglip/siglip_presets.py +15 -0
- keras_hub/src/models/smollm3/smollm3_backbone.py +211 -0
- keras_hub/src/models/smollm3/smollm3_causal_lm.py +310 -0
- keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py +84 -0
- keras_hub/src/models/smollm3/smollm3_layers.py +757 -0
- keras_hub/src/models/smollm3/smollm3_tokenizer.py +60 -0
- keras_hub/src/models/smollm3/smollm3_utils.py +56 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +3 -3
- keras_hub/src/models/t5gemma/__init__.py +5 -0
- keras_hub/src/models/t5gemma/t5gemma_attention.py +370 -0
- keras_hub/src/models/t5gemma/t5gemma_backbone.py +366 -0
- keras_hub/src/models/t5gemma/t5gemma_decoder.py +355 -0
- keras_hub/src/models/t5gemma/t5gemma_encoder.py +214 -0
- keras_hub/src/models/t5gemma/t5gemma_layers.py +118 -0
- keras_hub/src/models/t5gemma/t5gemma_presets.py +374 -0
- keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm.py +442 -0
- keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_preprocessor.py +216 -0
- keras_hub/src/models/t5gemma/t5gemma_tokenizer.py +84 -0
- keras_hub/src/models/text_to_image.py +5 -0
- keras_hub/src/samplers/beam_sampler.py +6 -6
- keras_hub/src/samplers/sampler.py +8 -6
- keras_hub/src/tests/test_case.py +40 -3
- keras_hub/src/tokenizers/tokenizer.py +15 -0
- keras_hub/src/utils/openvino_utils.py +141 -0
- keras_hub/src/utils/preset_utils.py +58 -2
- keras_hub/src/utils/tensor_utils.py +26 -2
- keras_hub/src/utils/timm/convert_mobilenetv5.py +321 -0
- keras_hub/src/utils/timm/preset_loader.py +8 -4
- keras_hub/src/utils/transformers/convert_dinov2.py +1 -0
- keras_hub/src/utils/transformers/convert_dinov3.py +106 -0
- keras_hub/src/utils/transformers/convert_qwen3_moe.py +216 -0
- keras_hub/src/utils/transformers/convert_smollm3.py +139 -0
- keras_hub/src/utils/transformers/convert_t5gemma.py +229 -0
- keras_hub/src/utils/transformers/convert_vit.py +4 -1
- keras_hub/src/utils/transformers/export/gemma.py +49 -4
- keras_hub/src/utils/transformers/export/hf_exporter.py +71 -25
- keras_hub/src/utils/transformers/preset_loader.py +12 -0
- keras_hub/src/version.py +1 -1
- keras_hub/tokenizers/__init__.py +15 -0
- {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/RECORD +126 -47
- {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/top_level.txt +0 -0
keras_hub/layers/__init__.py
CHANGED
|
@@ -75,6 +75,9 @@ from keras_hub.src.models.clip.clip_image_converter import (
|
|
|
75
75
|
from keras_hub.src.models.cspnet.cspnet_image_converter import (
|
|
76
76
|
CSPNetImageConverter as CSPNetImageConverter,
|
|
77
77
|
)
|
|
78
|
+
from keras_hub.src.models.d_fine.d_fine_image_converter import (
|
|
79
|
+
DFineImageConverter as DFineImageConverter,
|
|
80
|
+
)
|
|
78
81
|
from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import (
|
|
79
82
|
DeepLabV3ImageConverter as DeepLabV3ImageConverter,
|
|
80
83
|
)
|
|
@@ -84,9 +87,15 @@ from keras_hub.src.models.deit.deit_image_converter import (
|
|
|
84
87
|
from keras_hub.src.models.densenet.densenet_image_converter import (
|
|
85
88
|
DenseNetImageConverter as DenseNetImageConverter,
|
|
86
89
|
)
|
|
90
|
+
from keras_hub.src.models.depth_anything.depth_anything_image_converter import (
|
|
91
|
+
DepthAnythingImageConverter as DepthAnythingImageConverter,
|
|
92
|
+
)
|
|
87
93
|
from keras_hub.src.models.dinov2.dinov2_image_converter import (
|
|
88
94
|
DINOV2ImageConverter as DINOV2ImageConverter,
|
|
89
95
|
)
|
|
96
|
+
from keras_hub.src.models.dinov3.dinov3_image_converter import (
|
|
97
|
+
DINOV3ImageConverter as DINOV3ImageConverter,
|
|
98
|
+
)
|
|
90
99
|
from keras_hub.src.models.efficientnet.efficientnet_image_converter import (
|
|
91
100
|
EfficientNetImageConverter as EfficientNetImageConverter,
|
|
92
101
|
)
|
|
@@ -102,12 +111,18 @@ from keras_hub.src.models.mit.mit_image_converter import (
|
|
|
102
111
|
from keras_hub.src.models.mobilenet.mobilenet_image_converter import (
|
|
103
112
|
MobileNetImageConverter as MobileNetImageConverter,
|
|
104
113
|
)
|
|
114
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_image_converter import (
|
|
115
|
+
MobileNetV5ImageConverter as MobileNetV5ImageConverter,
|
|
116
|
+
)
|
|
105
117
|
from keras_hub.src.models.moonshine.moonshine_audio_converter import (
|
|
106
118
|
MoonshineAudioConverter as MoonshineAudioConverter,
|
|
107
119
|
)
|
|
108
120
|
from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import (
|
|
109
121
|
PaliGemmaImageConverter as PaliGemmaImageConverter,
|
|
110
122
|
)
|
|
123
|
+
from keras_hub.src.models.parseq.parseq_image_converter import (
|
|
124
|
+
PARSeqImageConverter as PARSeqImageConverter,
|
|
125
|
+
)
|
|
111
126
|
from keras_hub.src.models.resnet.resnet_image_converter import (
|
|
112
127
|
ResNetImageConverter as ResNetImageConverter,
|
|
113
128
|
)
|
keras_hub/models/__init__.py
CHANGED
|
@@ -108,6 +108,15 @@ from keras_hub.src.models.cspnet.cspnet_image_classifier import (
|
|
|
108
108
|
from keras_hub.src.models.cspnet.cspnet_image_classifier_preprocessor import (
|
|
109
109
|
CSPNetImageClassifierPreprocessor as CSPNetImageClassifierPreprocessor,
|
|
110
110
|
)
|
|
111
|
+
from keras_hub.src.models.d_fine.d_fine_backbone import (
|
|
112
|
+
DFineBackbone as DFineBackbone,
|
|
113
|
+
)
|
|
114
|
+
from keras_hub.src.models.d_fine.d_fine_object_detector import (
|
|
115
|
+
DFineObjectDetector as DFineObjectDetector,
|
|
116
|
+
)
|
|
117
|
+
from keras_hub.src.models.d_fine.d_fine_object_detector_preprocessor import (
|
|
118
|
+
DFineObjectDetectorPreprocessor as DFineObjectDetectorPreprocessor,
|
|
119
|
+
)
|
|
111
120
|
from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
|
|
112
121
|
DebertaV3Backbone as DebertaV3Backbone,
|
|
113
122
|
)
|
|
@@ -157,9 +166,27 @@ from keras_hub.src.models.densenet.densenet_image_classifier import (
|
|
|
157
166
|
from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import (
|
|
158
167
|
DenseNetImageClassifierPreprocessor as DenseNetImageClassifierPreprocessor,
|
|
159
168
|
)
|
|
169
|
+
from keras_hub.src.models.depth_anything.depth_anything_backbone import (
|
|
170
|
+
DepthAnythingBackbone as DepthAnythingBackbone,
|
|
171
|
+
)
|
|
172
|
+
from keras_hub.src.models.depth_anything.depth_anything_depth_estimator import (
|
|
173
|
+
DepthAnythingDepthEstimator as DepthAnythingDepthEstimator,
|
|
174
|
+
)
|
|
175
|
+
from keras_hub.src.models.depth_anything.depth_anything_depth_estimator_preprocessor import (
|
|
176
|
+
DepthAnythingDepthEstimatorPreprocessor as DepthAnythingDepthEstimatorPreprocessor,
|
|
177
|
+
)
|
|
178
|
+
from keras_hub.src.models.depth_estimator import (
|
|
179
|
+
DepthEstimator as DepthEstimator,
|
|
180
|
+
)
|
|
181
|
+
from keras_hub.src.models.depth_estimator_preprocessor import (
|
|
182
|
+
DepthEstimatorPreprocessor as DepthEstimatorPreprocessor,
|
|
183
|
+
)
|
|
160
184
|
from keras_hub.src.models.dinov2.dinov2_backbone import (
|
|
161
185
|
DINOV2Backbone as DINOV2Backbone,
|
|
162
186
|
)
|
|
187
|
+
from keras_hub.src.models.dinov3.dinov3_backbone import (
|
|
188
|
+
DINOV3Backbone as DINOV3Backbone,
|
|
189
|
+
)
|
|
163
190
|
from keras_hub.src.models.distil_bert.distil_bert_backbone import (
|
|
164
191
|
DistilBertBackbone as DistilBertBackbone,
|
|
165
192
|
)
|
|
@@ -404,6 +431,15 @@ from keras_hub.src.models.mobilenet.mobilenet_image_classifier import (
|
|
|
404
431
|
from keras_hub.src.models.mobilenet.mobilenet_image_classifier_preprocessor import (
|
|
405
432
|
MobileNetImageClassifierPreprocessor as MobileNetImageClassifierPreprocessor,
|
|
406
433
|
)
|
|
434
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
|
|
435
|
+
MobileNetV5Backbone as MobileNetV5Backbone,
|
|
436
|
+
)
|
|
437
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier import (
|
|
438
|
+
MobileNetV5ImageClassifier as MobileNetV5ImageClassifier,
|
|
439
|
+
)
|
|
440
|
+
from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier_preprocessor import (
|
|
441
|
+
MobileNetV5ImageClassifierPreprocessor as MobileNetV5ImageClassifierPreprocessor,
|
|
442
|
+
)
|
|
407
443
|
from keras_hub.src.models.moonshine.moonshine_audio_to_text import (
|
|
408
444
|
MoonshineAudioToText as MoonshineAudioToText,
|
|
409
445
|
)
|
|
@@ -446,6 +482,18 @@ from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm_preprocessor import (
|
|
|
446
482
|
from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
|
|
447
483
|
PaliGemmaTokenizer as PaliGemmaTokenizer,
|
|
448
484
|
)
|
|
485
|
+
from keras_hub.src.models.parseq.parseq_backbone import (
|
|
486
|
+
PARSeqBackbone as PARSeqBackbone,
|
|
487
|
+
)
|
|
488
|
+
from keras_hub.src.models.parseq.parseq_causal_lm import (
|
|
489
|
+
PARSeqCausalLM as PARSeqCausalLM,
|
|
490
|
+
)
|
|
491
|
+
from keras_hub.src.models.parseq.parseq_causal_lm_preprocessor import (
|
|
492
|
+
PARSeqCausalLMPreprocessor as PARSeqCausalLMPreprocessor,
|
|
493
|
+
)
|
|
494
|
+
from keras_hub.src.models.parseq.parseq_tokenizer import (
|
|
495
|
+
PARSeqTokenizer as PARSeqTokenizer,
|
|
496
|
+
)
|
|
449
497
|
from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone as Phi3Backbone
|
|
450
498
|
from keras_hub.src.models.phi3.phi3_causal_lm import (
|
|
451
499
|
Phi3CausalLM as Phi3CausalLM,
|
|
@@ -491,6 +539,15 @@ from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
|
|
|
491
539
|
from keras_hub.src.models.qwen3.qwen3_tokenizer import (
|
|
492
540
|
Qwen3Tokenizer as Qwen3Tokenizer,
|
|
493
541
|
)
|
|
542
|
+
from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import (
|
|
543
|
+
Qwen3MoeBackbone as Qwen3MoeBackbone,
|
|
544
|
+
)
|
|
545
|
+
from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import (
|
|
546
|
+
Qwen3MoeCausalLM as Qwen3MoeCausalLM,
|
|
547
|
+
)
|
|
548
|
+
from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor import (
|
|
549
|
+
Qwen3MoeCausalLMPreprocessor as Qwen3MoeCausalLMPreprocessor,
|
|
550
|
+
)
|
|
494
551
|
from keras_hub.src.models.qwen_moe.qwen_moe_backbone import (
|
|
495
552
|
QwenMoeBackbone as QwenMoeBackbone,
|
|
496
553
|
)
|
|
@@ -595,6 +652,30 @@ from keras_hub.src.models.siglip.siglip_tokenizer import (
|
|
|
595
652
|
from keras_hub.src.models.siglip.siglip_vision_encoder import (
|
|
596
653
|
SigLIPVisionEncoder as SigLIPVisionEncoder,
|
|
597
654
|
)
|
|
655
|
+
from keras_hub.src.models.smollm3.smollm3_backbone import (
|
|
656
|
+
SmolLM3Backbone as SmolLM3Backbone,
|
|
657
|
+
)
|
|
658
|
+
from keras_hub.src.models.smollm3.smollm3_backbone import (
|
|
659
|
+
SmolLM3Backbone as SmolLMBackbone,
|
|
660
|
+
)
|
|
661
|
+
from keras_hub.src.models.smollm3.smollm3_causal_lm import (
|
|
662
|
+
SmolLM3CausalLM as SmolLM3CausalLM,
|
|
663
|
+
)
|
|
664
|
+
from keras_hub.src.models.smollm3.smollm3_causal_lm import (
|
|
665
|
+
SmolLM3CausalLM as SmolLMCausalLM,
|
|
666
|
+
)
|
|
667
|
+
from keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor import (
|
|
668
|
+
SmolLM3CausalLMPreprocessor as SmolLM3CausalLMPreprocessor,
|
|
669
|
+
)
|
|
670
|
+
from keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor import (
|
|
671
|
+
SmolLM3CausalLMPreprocessor as SmolLMCausalLMPreprocessor,
|
|
672
|
+
)
|
|
673
|
+
from keras_hub.src.models.smollm3.smollm3_tokenizer import (
|
|
674
|
+
SmolLM3Tokenizer as SmolLM3Tokenizer,
|
|
675
|
+
)
|
|
676
|
+
from keras_hub.src.models.smollm3.smollm3_tokenizer import (
|
|
677
|
+
SmolLM3Tokenizer as SmolLMTokenizer,
|
|
678
|
+
)
|
|
598
679
|
from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (
|
|
599
680
|
StableDiffusion3Backbone as StableDiffusion3Backbone,
|
|
600
681
|
)
|
|
@@ -615,6 +696,18 @@ from keras_hub.src.models.t5.t5_preprocessor import (
|
|
|
615
696
|
T5Preprocessor as T5Preprocessor,
|
|
616
697
|
)
|
|
617
698
|
from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer
|
|
699
|
+
from keras_hub.src.models.t5gemma.t5gemma_backbone import (
|
|
700
|
+
T5GemmaBackbone as T5GemmaBackbone,
|
|
701
|
+
)
|
|
702
|
+
from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import (
|
|
703
|
+
T5GemmaSeq2SeqLM as T5GemmaSeq2SeqLM,
|
|
704
|
+
)
|
|
705
|
+
from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm_preprocessor import (
|
|
706
|
+
T5GemmaSeq2SeqLMPreprocessor as T5GemmaSeq2SeqLMPreprocessor,
|
|
707
|
+
)
|
|
708
|
+
from keras_hub.src.models.t5gemma.t5gemma_tokenizer import (
|
|
709
|
+
T5GemmaTokenizer as T5GemmaTokenizer,
|
|
710
|
+
)
|
|
618
711
|
from keras_hub.src.models.task import Task as Task
|
|
619
712
|
from keras_hub.src.models.text_classifier import TextClassifier as Classifier
|
|
620
713
|
from keras_hub.src.models.text_classifier import (
|
|
@@ -31,6 +31,11 @@ class PositionEmbedding(keras.layers.Layer):
|
|
|
31
31
|
start_index: An integer or integer tensor. The starting position to
|
|
32
32
|
compute the position embedding from. This is useful during cached
|
|
33
33
|
decoding, where each position is predicted separately in a loop.
|
|
34
|
+
positions: Tensor of shape `(sequence_length,)` or
|
|
35
|
+
`(batch_size, sequence_length)`. Custom positions for the input
|
|
36
|
+
sequence. If specified, this tensor will be used to
|
|
37
|
+
compute the position embedding, and the `start_index` argument will
|
|
38
|
+
be ignored. This is useful for cases with non-standard positions.
|
|
34
39
|
|
|
35
40
|
Example:
|
|
36
41
|
|
|
@@ -91,18 +96,28 @@ class PositionEmbedding(keras.layers.Layer):
|
|
|
91
96
|
)
|
|
92
97
|
self.built = True
|
|
93
98
|
|
|
94
|
-
def call(self, inputs, start_index=0):
|
|
99
|
+
def call(self, inputs, start_index=0, positions=None):
|
|
95
100
|
shape = ops.shape(inputs)
|
|
96
101
|
feature_length = shape[-1]
|
|
97
102
|
sequence_length = shape[-2]
|
|
98
103
|
# trim to match the length of the input sequence, which might be less
|
|
99
104
|
# than the sequence_length of the layer.
|
|
100
105
|
position_embeddings = ops.convert_to_tensor(self.position_embeddings)
|
|
101
|
-
|
|
102
|
-
position_embeddings
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
+
if positions is None:
|
|
107
|
+
position_embeddings = ops.slice(
|
|
108
|
+
position_embeddings,
|
|
109
|
+
(start_index, 0),
|
|
110
|
+
(sequence_length, feature_length),
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
# Take care of unbatched `positions`.
|
|
114
|
+
if len(ops.shape(positions)) == 1:
|
|
115
|
+
positions = ops.expand_dims(positions, axis=0)
|
|
116
|
+
|
|
117
|
+
position_embeddings = ops.take(
|
|
118
|
+
position_embeddings, positions, axis=0
|
|
119
|
+
)
|
|
120
|
+
|
|
106
121
|
return ops.broadcast_to(position_embeddings, shape)
|
|
107
122
|
|
|
108
123
|
def compute_output_shape(self, input_shape):
|
|
@@ -235,7 +235,8 @@ class ReversibleEmbedding(keras.layers.Embedding):
|
|
|
235
235
|
|
|
236
236
|
return super()._int8_call(inputs)
|
|
237
237
|
|
|
238
|
-
def quantize(self, mode, type_check=True):
|
|
238
|
+
def quantize(self, mode, type_check=True, config=None):
|
|
239
|
+
del config
|
|
239
240
|
if type_check and type(self) is not ReversibleEmbedding:
|
|
240
241
|
raise self._not_implemented_error(self.quantize)
|
|
241
242
|
|
|
@@ -244,6 +245,12 @@ class ReversibleEmbedding(keras.layers.Embedding):
|
|
|
244
245
|
inputs, axis=axis, to_numpy=True
|
|
245
246
|
)
|
|
246
247
|
|
|
248
|
+
if mode != "int8":
|
|
249
|
+
raise NotImplementedError(
|
|
250
|
+
"Invalid quantization mode. Expected 'int8'. "
|
|
251
|
+
f"Received: quantization_mode={mode}"
|
|
252
|
+
)
|
|
253
|
+
|
|
247
254
|
embeddings_shape = (self.input_dim, self.output_dim)
|
|
248
255
|
if mode == "int8":
|
|
249
256
|
embeddings, embeddings_scale = abs_max_quantize(
|
|
@@ -37,6 +37,11 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
37
37
|
start_index: An integer or integer tensor. The starting position to
|
|
38
38
|
compute the rotary embedding from. This is useful during cached
|
|
39
39
|
decoding, where each position is predicted separately in a loop.
|
|
40
|
+
positions: Tensor of shape `(sequence_length,)` or
|
|
41
|
+
`(batch_size, sequence_length)`. Custom positions for the input
|
|
42
|
+
sequence. If specified, this tensor will be used to
|
|
43
|
+
compute the rotary embedding, and the `start_index` argument will
|
|
44
|
+
be ignored. This is useful for cases with non-standard positions.
|
|
40
45
|
|
|
41
46
|
Examples:
|
|
42
47
|
|
|
@@ -76,6 +81,11 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
76
81
|
self.built = True
|
|
77
82
|
|
|
78
83
|
def call(self, inputs, start_index=0, positions=None):
|
|
84
|
+
# Take care of unbatched `positions`.
|
|
85
|
+
if positions is not None:
|
|
86
|
+
if len(ops.shape(positions)) == 1:
|
|
87
|
+
positions = ops.expand_dims(positions, axis=0)
|
|
88
|
+
|
|
79
89
|
inputs = ops.moveaxis(
|
|
80
90
|
inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
|
|
81
91
|
)
|
|
@@ -103,6 +113,7 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
103
113
|
return positions + ops.cast(start_index, dtype="float32")
|
|
104
114
|
|
|
105
115
|
def _compute_cos_sin_embedding(self, inputs, start_index=0, positions=None):
|
|
116
|
+
batch_axis = 0
|
|
106
117
|
feature_axis = len(inputs.shape) - 1
|
|
107
118
|
sequence_axis = 1
|
|
108
119
|
|
|
@@ -111,21 +122,20 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
111
122
|
|
|
112
123
|
if positions is None:
|
|
113
124
|
positions = self._compute_positions(inputs, start_index)
|
|
125
|
+
positions = ops.expand_dims(positions, axis=batch_axis)
|
|
114
126
|
else:
|
|
115
127
|
positions = ops.cast(positions, "float32")
|
|
116
|
-
|
|
117
128
|
positions = positions / ops.cast(self.scaling_factor, "float32")
|
|
118
|
-
|
|
129
|
+
|
|
130
|
+
freq = ops.einsum("bi,j->bij", positions, inverse_freq)
|
|
131
|
+
|
|
119
132
|
embedding = ops.stack((freq, freq), axis=-2)
|
|
120
133
|
embedding = ops.reshape(
|
|
121
134
|
embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
|
|
122
135
|
)
|
|
123
136
|
|
|
124
|
-
# Reshape the embedding to be broadcastable with input shape.
|
|
125
|
-
if feature_axis < sequence_axis:
|
|
126
|
-
embedding = ops.transpose(embedding)
|
|
127
137
|
for axis in range(len(inputs.shape)):
|
|
128
|
-
if axis
|
|
138
|
+
if axis not in (batch_axis, sequence_axis, feature_axis):
|
|
129
139
|
embedding = ops.expand_dims(embedding, axis)
|
|
130
140
|
|
|
131
141
|
cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
|
|
@@ -30,6 +30,11 @@ class SinePositionEncoding(keras.layers.Layer):
|
|
|
30
30
|
start_index: An integer or integer tensor. The starting position to
|
|
31
31
|
compute the encoding from. This is useful during cached decoding,
|
|
32
32
|
where each position is predicted separately in a loop.
|
|
33
|
+
positions: Tensor of shape `(sequence_length,)` or
|
|
34
|
+
`(batch_size, sequence_length)`. Custom positions for the input
|
|
35
|
+
sequence. If specified, this tensor will be used to
|
|
36
|
+
compute the position embedding, and the `start_index` argument will
|
|
37
|
+
be ignored. This is useful for cases with non-standard positions.
|
|
33
38
|
|
|
34
39
|
Example:
|
|
35
40
|
```python
|
|
@@ -58,27 +63,35 @@ class SinePositionEncoding(keras.layers.Layer):
|
|
|
58
63
|
self.max_wavelength = max_wavelength
|
|
59
64
|
self.built = True
|
|
60
65
|
|
|
61
|
-
def call(self, inputs, start_index=0):
|
|
66
|
+
def call(self, inputs, start_index=0, positions=None):
|
|
62
67
|
shape = ops.shape(inputs)
|
|
63
68
|
seq_length = shape[-2]
|
|
64
69
|
hidden_size = shape[-1]
|
|
65
|
-
|
|
66
|
-
|
|
70
|
+
|
|
71
|
+
if positions is None:
|
|
72
|
+
positions = ops.arange(seq_length)
|
|
73
|
+
positions = ops.cast(positions + start_index, self.compute_dtype)
|
|
74
|
+
|
|
75
|
+
# Take care of unbatched `positions`.
|
|
76
|
+
if len(ops.shape(positions)) == 1:
|
|
77
|
+
positions = ops.expand_dims(positions, axis=0)
|
|
78
|
+
|
|
67
79
|
min_freq = ops.cast(1 / self.max_wavelength, dtype=self.compute_dtype)
|
|
68
80
|
timescales = ops.power(
|
|
69
81
|
min_freq,
|
|
70
82
|
ops.cast(2 * (ops.arange(hidden_size) // 2), self.compute_dtype)
|
|
71
83
|
/ ops.cast(hidden_size, self.compute_dtype),
|
|
72
84
|
)
|
|
73
|
-
angles = ops.
|
|
85
|
+
angles = ops.einsum("bi,j->bij", positions, timescales)
|
|
86
|
+
|
|
74
87
|
# even indices are sine, odd are cosine
|
|
75
88
|
cos_mask = ops.cast(ops.arange(hidden_size) % 2, self.compute_dtype)
|
|
76
89
|
sin_mask = 1 - cos_mask
|
|
77
|
-
# embedding shape is [seq_length, hidden_size]
|
|
78
|
-
positional_encodings = (
|
|
79
|
-
ops.sin(angles) * sin_mask + ops.cos(angles) * cos_mask
|
|
80
|
-
)
|
|
81
90
|
|
|
91
|
+
# embedding shape is `[bsz (or 1), seq_length, hidden_size]`.
|
|
92
|
+
positional_encodings = ops.einsum(
|
|
93
|
+
"bij,j->bij", ops.sin(angles), sin_mask
|
|
94
|
+
) + ops.einsum("bij,j->bij", ops.cos(angles), cos_mask)
|
|
82
95
|
return ops.broadcast_to(positional_encodings, shape)
|
|
83
96
|
|
|
84
97
|
def get_config(self):
|
|
@@ -120,11 +120,12 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
|
|
|
120
120
|
)
|
|
121
121
|
return config
|
|
122
122
|
|
|
123
|
-
def call(self, inputs, start_index=0):
|
|
123
|
+
def call(self, inputs, start_index=0, positions=None):
|
|
124
124
|
embedded_tokens = self.token_embedding(inputs)
|
|
125
125
|
embedded_positions = self.position_embedding(
|
|
126
126
|
embedded_tokens,
|
|
127
127
|
start_index=start_index,
|
|
128
|
+
positions=positions,
|
|
128
129
|
)
|
|
129
130
|
outputs = embedded_tokens + embedded_positions
|
|
130
131
|
return outputs
|
keras_hub/src/models/backbone.py
CHANGED
|
@@ -91,21 +91,16 @@ class Backbone(keras.Model):
|
|
|
91
91
|
}
|
|
92
92
|
|
|
93
93
|
# Add quantization support by utilizing `DTypePolicyMap`
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
):
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if len(policy_map) > 0:
|
|
105
|
-
config.update({"dtype": policy_map})
|
|
106
|
-
# Before Keras 3.2, there is no `keras.dtype_policies.get`.
|
|
107
|
-
except AttributeError:
|
|
108
|
-
pass
|
|
94
|
+
dtype = self.dtype_policy
|
|
95
|
+
if not isinstance(dtype, keras.dtype_policies.DTypePolicyMap):
|
|
96
|
+
policy_map = keras.dtype_policies.DTypePolicyMap()
|
|
97
|
+
for layer in self._flatten_layers():
|
|
98
|
+
if layer.quantization_mode is not None:
|
|
99
|
+
policy_map[layer.path] = layer.dtype_policy
|
|
100
|
+
if len(policy_map) > 0:
|
|
101
|
+
dtype = policy_map
|
|
102
|
+
|
|
103
|
+
config.update({"dtype": keras.dtype_policies.serialize(dtype)})
|
|
109
104
|
return config
|
|
110
105
|
|
|
111
106
|
@classmethod
|
|
@@ -135,7 +130,8 @@ class Backbone(keras.Model):
|
|
|
135
130
|
1. a built-in preset identifier like `'bert_base_en'`
|
|
136
131
|
2. a Kaggle Models handle like `'kaggle://user/bert/keras/bert_base_en'`
|
|
137
132
|
3. a Hugging Face handle like `'hf://user/bert_base_en'`
|
|
138
|
-
4. a
|
|
133
|
+
4. a ModelScope handle like `'modelscope://user/bert_base_en'`
|
|
134
|
+
5. a path to a local preset directory like `'./bert_base_en'`
|
|
139
135
|
|
|
140
136
|
This constructor can be called in one of two ways. Either from the base
|
|
141
137
|
class like `keras_hub.models.Backbone.from_preset()`, or from
|
|
@@ -277,3 +273,19 @@ class Backbone(keras.Model):
|
|
|
277
273
|
layer.lora_kernel_a.assign(lora_kernel_a)
|
|
278
274
|
layer.lora_kernel_b.assign(lora_kernel_b)
|
|
279
275
|
store.close()
|
|
276
|
+
|
|
277
|
+
def export_to_transformers(self, path):
|
|
278
|
+
"""Export the backbone model to HuggingFace Transformers format.
|
|
279
|
+
|
|
280
|
+
This saves the backbone's configuration and weights in a format
|
|
281
|
+
compatible with HuggingFace Transformers. For unsupported model
|
|
282
|
+
architectures, a ValueError is raised.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
path: str. Path to save the exported model.
|
|
286
|
+
"""
|
|
287
|
+
from keras_hub.src.utils.transformers.export.hf_exporter import (
|
|
288
|
+
export_backbone,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
export_backbone(self, path)
|
|
@@ -132,6 +132,17 @@ class CausalLM(Task):
|
|
|
132
132
|
return self.generate_function
|
|
133
133
|
|
|
134
134
|
self.generate_function = self.generate_step
|
|
135
|
+
if keras.config.backend() == "openvino":
|
|
136
|
+
from keras_hub.src.utils.openvino_utils import ov_infer
|
|
137
|
+
|
|
138
|
+
def wrapped_generate_function(inputs, stop_token_ids=None):
|
|
139
|
+
# Convert to numpy for OpenVINO backend
|
|
140
|
+
inputs = tree.map_structure(ops.array, inputs)
|
|
141
|
+
return ov_infer(
|
|
142
|
+
self, inputs, stop_token_ids, self.generate_step
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
self.generate_function = wrapped_generate_function
|
|
135
146
|
if keras.config.backend() == "torch":
|
|
136
147
|
import torch
|
|
137
148
|
|
|
@@ -392,3 +403,29 @@ class CausalLM(Task):
|
|
|
392
403
|
outputs = [postprocess(x) for x in outputs]
|
|
393
404
|
|
|
394
405
|
return self._normalize_generate_outputs(outputs, input_is_scalar)
|
|
406
|
+
|
|
407
|
+
def export_to_transformers(self, path):
|
|
408
|
+
"""Export the full CausalLM model to HuggingFace Transformers format.
|
|
409
|
+
|
|
410
|
+
This exports the trainable model, tokenizer, and configurations in a
|
|
411
|
+
format compatible with HuggingFace Transformers. For unsupported model
|
|
412
|
+
architectures, a ValueError is raised.
|
|
413
|
+
|
|
414
|
+
If the preprocessor is attached (default), both the trainable model and
|
|
415
|
+
tokenizer are exported. To export only the trainable model, set
|
|
416
|
+
`self.preprocessor = None` before calling this method, then export the
|
|
417
|
+
preprocessor separately via `preprocessor.export_to_transformers(path)`.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
path: str. Path to save the exported model.
|
|
421
|
+
"""
|
|
422
|
+
from keras_hub.src.utils.transformers.export.hf_exporter import (
|
|
423
|
+
export_to_safetensors,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
export_to_safetensors(self, path)
|
|
427
|
+
|
|
428
|
+
def _post_quantize(self, mode, **kwargs):
|
|
429
|
+
super()._post_quantize(mode, **kwargs)
|
|
430
|
+
# Reset the compiled generate function.
|
|
431
|
+
self.generate_function = None
|
|
@@ -180,3 +180,17 @@ class CausalLMPreprocessor(Preprocessor):
|
|
|
180
180
|
self._sequence_length = value
|
|
181
181
|
if self.packer is not None:
|
|
182
182
|
self.packer.sequence_length = value
|
|
183
|
+
|
|
184
|
+
def export_to_transformers(self, path):
|
|
185
|
+
"""Export the preprocessor to HuggingFace Transformers format.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
path: str. Path to save the exported preprocessor/tokenizer.
|
|
189
|
+
"""
|
|
190
|
+
if self.tokenizer is None:
|
|
191
|
+
raise ValueError("Preprocessor must have a tokenizer for export.")
|
|
192
|
+
from keras_hub.src.utils.transformers.export.hf_exporter import (
|
|
193
|
+
export_tokenizer,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
export_tokenizer(self.tokenizer, path)
|
|
@@ -11,7 +11,7 @@ backbone_presets = {
|
|
|
11
11
|
"params": 149620934,
|
|
12
12
|
"path": "clip",
|
|
13
13
|
},
|
|
14
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch16/
|
|
14
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch16/3",
|
|
15
15
|
},
|
|
16
16
|
"clip_vit_base_patch32": {
|
|
17
17
|
"metadata": {
|
|
@@ -22,7 +22,7 @@ backbone_presets = {
|
|
|
22
22
|
"params": 151277363,
|
|
23
23
|
"path": "clip",
|
|
24
24
|
},
|
|
25
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch32/
|
|
25
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch32/3",
|
|
26
26
|
},
|
|
27
27
|
"clip_vit_large_patch14": {
|
|
28
28
|
"metadata": {
|
|
@@ -33,7 +33,7 @@ backbone_presets = {
|
|
|
33
33
|
"params": 427616770,
|
|
34
34
|
"path": "clip",
|
|
35
35
|
},
|
|
36
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14/
|
|
36
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14/3",
|
|
37
37
|
},
|
|
38
38
|
"clip_vit_large_patch14_336": {
|
|
39
39
|
"metadata": {
|
|
@@ -44,7 +44,7 @@ backbone_presets = {
|
|
|
44
44
|
"params": 427944770,
|
|
45
45
|
"path": "clip",
|
|
46
46
|
},
|
|
47
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14_336/
|
|
47
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14_336/3",
|
|
48
48
|
},
|
|
49
49
|
"clip_vit_b_32_laion2b_s34b_b79k": {
|
|
50
50
|
"metadata": {
|
|
@@ -55,7 +55,7 @@ backbone_presets = {
|
|
|
55
55
|
"params": 151277363,
|
|
56
56
|
"path": "clip",
|
|
57
57
|
},
|
|
58
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_b_32_laion2b_s34b_b79k/
|
|
58
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_b_32_laion2b_s34b_b79k/3",
|
|
59
59
|
},
|
|
60
60
|
"clip_vit_h_14_laion2b_s32b_b79k": {
|
|
61
61
|
"metadata": {
|
|
@@ -66,7 +66,7 @@ backbone_presets = {
|
|
|
66
66
|
"params": 986109698,
|
|
67
67
|
"path": "clip",
|
|
68
68
|
},
|
|
69
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_h_14_laion2b_s32b_b79k/
|
|
69
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_h_14_laion2b_s32b_b79k/3",
|
|
70
70
|
},
|
|
71
71
|
"clip_vit_g_14_laion2b_s12b_b42k": {
|
|
72
72
|
"metadata": {
|
|
@@ -77,7 +77,7 @@ backbone_presets = {
|
|
|
77
77
|
"params": 1366678530,
|
|
78
78
|
"path": "clip",
|
|
79
79
|
},
|
|
80
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_g_14_laion2b_s12b_b42k/
|
|
80
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_g_14_laion2b_s12b_b42k/3",
|
|
81
81
|
},
|
|
82
82
|
"clip_vit_bigg_14_laion2b_39b_b160k": {
|
|
83
83
|
"metadata": {
|
|
@@ -88,6 +88,6 @@ backbone_presets = {
|
|
|
88
88
|
"params": 2539567362,
|
|
89
89
|
"path": "clip",
|
|
90
90
|
},
|
|
91
|
-
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_bigg_14_laion2b_39b_b160k/
|
|
91
|
+
"kaggle_handle": "kaggle://keras/clip/keras/clip_vit_bigg_14_laion2b_39b_b160k/3",
|
|
92
92
|
},
|
|
93
93
|
}
|