keras-hub-nightly 0.22.0.dev202508170419__py3-none-any.whl → 0.24.0.dev202511090424__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of keras-hub-nightly might be problematic. Click here for more details.

Files changed (126) hide show
  1. keras_hub/layers/__init__.py +15 -0
  2. keras_hub/models/__init__.py +93 -0
  3. keras_hub/src/layers/modeling/position_embedding.py +21 -6
  4. keras_hub/src/layers/modeling/reversible_embedding.py +8 -1
  5. keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
  6. keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
  7. keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
  8. keras_hub/src/models/backbone.py +28 -16
  9. keras_hub/src/models/causal_lm.py +37 -0
  10. keras_hub/src/models/causal_lm_preprocessor.py +14 -0
  11. keras_hub/src/models/clip/clip_presets.py +8 -8
  12. keras_hub/src/models/d_fine/__init__.py +5 -0
  13. keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
  14. keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
  15. keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
  16. keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
  17. keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
  18. keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
  19. keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
  20. keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
  21. keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
  22. keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
  23. keras_hub/src/models/d_fine/d_fine_presets.py +155 -0
  24. keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
  25. keras_hub/src/models/deberta_v3/disentangled_self_attention.py +7 -2
  26. keras_hub/src/models/depth_anything/__init__.py +9 -0
  27. keras_hub/src/models/depth_anything/depth_anything_backbone.py +232 -0
  28. keras_hub/src/models/depth_anything/depth_anything_depth_estimator.py +70 -0
  29. keras_hub/src/models/depth_anything/depth_anything_depth_estimator_preprocessor.py +16 -0
  30. keras_hub/src/models/depth_anything/depth_anything_image_converter.py +10 -0
  31. keras_hub/src/models/depth_anything/depth_anything_layers.py +725 -0
  32. keras_hub/src/models/depth_anything/depth_anything_loss.py +89 -0
  33. keras_hub/src/models/depth_anything/depth_anything_presets.py +41 -0
  34. keras_hub/src/models/depth_anything/interpolate.py +62 -0
  35. keras_hub/src/models/depth_estimator.py +239 -0
  36. keras_hub/src/models/depth_estimator_preprocessor.py +78 -0
  37. keras_hub/src/models/dinov2/dinov2_backbone.py +29 -3
  38. keras_hub/src/models/dinov2/dinov2_layers.py +16 -4
  39. keras_hub/src/models/dinov3/__init__.py +5 -0
  40. keras_hub/src/models/dinov3/dinov3_backbone.py +263 -0
  41. keras_hub/src/models/dinov3/dinov3_image_converter.py +8 -0
  42. keras_hub/src/models/dinov3/dinov3_layers.py +1013 -0
  43. keras_hub/src/models/dinov3/dinov3_presets.py +4 -0
  44. keras_hub/src/models/gemma/gemma_backbone.py +0 -1
  45. keras_hub/src/models/gemma/gemma_presets.py +30 -0
  46. keras_hub/src/models/gemma3/gemma3_attention.py +48 -0
  47. keras_hub/src/models/gemma3/gemma3_backbone.py +4 -1
  48. keras_hub/src/models/gemma3/gemma3_decoder_block.py +12 -0
  49. keras_hub/src/models/gemma3/gemma3_presets.py +39 -0
  50. keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
  51. keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
  52. keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
  53. keras_hub/src/models/image_to_image.py +5 -0
  54. keras_hub/src/models/inpaint.py +5 -0
  55. keras_hub/src/models/mobilenetv5/__init__.py +9 -0
  56. keras_hub/src/models/mobilenetv5/mobilenetv5_attention.py +699 -0
  57. keras_hub/src/models/mobilenetv5/mobilenetv5_backbone.py +396 -0
  58. keras_hub/src/models/mobilenetv5/mobilenetv5_blocks.py +890 -0
  59. keras_hub/src/models/mobilenetv5/mobilenetv5_builder.py +436 -0
  60. keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier.py +157 -0
  61. keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_preprocessor.py +16 -0
  62. keras_hub/src/models/mobilenetv5/mobilenetv5_image_converter.py +10 -0
  63. keras_hub/src/models/mobilenetv5/mobilenetv5_layers.py +462 -0
  64. keras_hub/src/models/mobilenetv5/mobilenetv5_presets.py +15 -0
  65. keras_hub/src/models/mobilenetv5/mobilenetv5_utils.py +146 -0
  66. keras_hub/src/models/parseq/__init__.py +5 -0
  67. keras_hub/src/models/parseq/parseq_backbone.py +134 -0
  68. keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
  69. keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
  70. keras_hub/src/models/parseq/parseq_decoder.py +418 -0
  71. keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
  72. keras_hub/src/models/parseq/parseq_presets.py +15 -0
  73. keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
  74. keras_hub/src/models/qwen3_moe/__init__.py +5 -0
  75. keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py +371 -0
  76. keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +365 -0
  77. keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm.py +357 -0
  78. keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_preprocessor.py +12 -0
  79. keras_hub/src/models/qwen3_moe/qwen3_moe_decoder.py +672 -0
  80. keras_hub/src/models/qwen3_moe/qwen3_moe_layernorm.py +45 -0
  81. keras_hub/src/models/qwen3_moe/qwen3_moe_presets.py +30 -0
  82. keras_hub/src/models/qwen3_moe/qwen3_moe_tokenizer.py +48 -0
  83. keras_hub/src/models/sam/sam_prompt_encoder.py +3 -1
  84. keras_hub/src/models/siglip/siglip_presets.py +15 -0
  85. keras_hub/src/models/smollm3/smollm3_backbone.py +211 -0
  86. keras_hub/src/models/smollm3/smollm3_causal_lm.py +310 -0
  87. keras_hub/src/models/smollm3/smollm3_causal_lm_preprocessor.py +84 -0
  88. keras_hub/src/models/smollm3/smollm3_layers.py +757 -0
  89. keras_hub/src/models/smollm3/smollm3_tokenizer.py +60 -0
  90. keras_hub/src/models/smollm3/smollm3_utils.py +56 -0
  91. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +3 -3
  92. keras_hub/src/models/t5gemma/__init__.py +5 -0
  93. keras_hub/src/models/t5gemma/t5gemma_attention.py +370 -0
  94. keras_hub/src/models/t5gemma/t5gemma_backbone.py +366 -0
  95. keras_hub/src/models/t5gemma/t5gemma_decoder.py +355 -0
  96. keras_hub/src/models/t5gemma/t5gemma_encoder.py +214 -0
  97. keras_hub/src/models/t5gemma/t5gemma_layers.py +118 -0
  98. keras_hub/src/models/t5gemma/t5gemma_presets.py +374 -0
  99. keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm.py +442 -0
  100. keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_preprocessor.py +216 -0
  101. keras_hub/src/models/t5gemma/t5gemma_tokenizer.py +84 -0
  102. keras_hub/src/models/text_to_image.py +5 -0
  103. keras_hub/src/samplers/beam_sampler.py +6 -6
  104. keras_hub/src/samplers/sampler.py +8 -6
  105. keras_hub/src/tests/test_case.py +40 -3
  106. keras_hub/src/tokenizers/tokenizer.py +15 -0
  107. keras_hub/src/utils/openvino_utils.py +141 -0
  108. keras_hub/src/utils/preset_utils.py +58 -2
  109. keras_hub/src/utils/tensor_utils.py +26 -2
  110. keras_hub/src/utils/timm/convert_mobilenetv5.py +321 -0
  111. keras_hub/src/utils/timm/preset_loader.py +8 -4
  112. keras_hub/src/utils/transformers/convert_dinov2.py +1 -0
  113. keras_hub/src/utils/transformers/convert_dinov3.py +106 -0
  114. keras_hub/src/utils/transformers/convert_qwen3_moe.py +216 -0
  115. keras_hub/src/utils/transformers/convert_smollm3.py +139 -0
  116. keras_hub/src/utils/transformers/convert_t5gemma.py +229 -0
  117. keras_hub/src/utils/transformers/convert_vit.py +4 -1
  118. keras_hub/src/utils/transformers/export/gemma.py +49 -4
  119. keras_hub/src/utils/transformers/export/hf_exporter.py +71 -25
  120. keras_hub/src/utils/transformers/preset_loader.py +12 -0
  121. keras_hub/src/version.py +1 -1
  122. keras_hub/tokenizers/__init__.py +15 -0
  123. {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/METADATA +1 -1
  124. {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/RECORD +126 -47
  125. {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/WHEEL +0 -0
  126. {keras_hub_nightly-0.22.0.dev202508170419.dist-info → keras_hub_nightly-0.24.0.dev202511090424.dist-info}/top_level.txt +0 -0
@@ -75,6 +75,9 @@ from keras_hub.src.models.clip.clip_image_converter import (
75
75
  from keras_hub.src.models.cspnet.cspnet_image_converter import (
76
76
  CSPNetImageConverter as CSPNetImageConverter,
77
77
  )
78
+ from keras_hub.src.models.d_fine.d_fine_image_converter import (
79
+ DFineImageConverter as DFineImageConverter,
80
+ )
78
81
  from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import (
79
82
  DeepLabV3ImageConverter as DeepLabV3ImageConverter,
80
83
  )
@@ -84,9 +87,15 @@ from keras_hub.src.models.deit.deit_image_converter import (
84
87
  from keras_hub.src.models.densenet.densenet_image_converter import (
85
88
  DenseNetImageConverter as DenseNetImageConverter,
86
89
  )
90
+ from keras_hub.src.models.depth_anything.depth_anything_image_converter import (
91
+ DepthAnythingImageConverter as DepthAnythingImageConverter,
92
+ )
87
93
  from keras_hub.src.models.dinov2.dinov2_image_converter import (
88
94
  DINOV2ImageConverter as DINOV2ImageConverter,
89
95
  )
96
+ from keras_hub.src.models.dinov3.dinov3_image_converter import (
97
+ DINOV3ImageConverter as DINOV3ImageConverter,
98
+ )
90
99
  from keras_hub.src.models.efficientnet.efficientnet_image_converter import (
91
100
  EfficientNetImageConverter as EfficientNetImageConverter,
92
101
  )
@@ -102,12 +111,18 @@ from keras_hub.src.models.mit.mit_image_converter import (
102
111
  from keras_hub.src.models.mobilenet.mobilenet_image_converter import (
103
112
  MobileNetImageConverter as MobileNetImageConverter,
104
113
  )
114
+ from keras_hub.src.models.mobilenetv5.mobilenetv5_image_converter import (
115
+ MobileNetV5ImageConverter as MobileNetV5ImageConverter,
116
+ )
105
117
  from keras_hub.src.models.moonshine.moonshine_audio_converter import (
106
118
  MoonshineAudioConverter as MoonshineAudioConverter,
107
119
  )
108
120
  from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import (
109
121
  PaliGemmaImageConverter as PaliGemmaImageConverter,
110
122
  )
123
+ from keras_hub.src.models.parseq.parseq_image_converter import (
124
+ PARSeqImageConverter as PARSeqImageConverter,
125
+ )
111
126
  from keras_hub.src.models.resnet.resnet_image_converter import (
112
127
  ResNetImageConverter as ResNetImageConverter,
113
128
  )
@@ -108,6 +108,15 @@ from keras_hub.src.models.cspnet.cspnet_image_classifier import (
108
108
  from keras_hub.src.models.cspnet.cspnet_image_classifier_preprocessor import (
109
109
  CSPNetImageClassifierPreprocessor as CSPNetImageClassifierPreprocessor,
110
110
  )
111
+ from keras_hub.src.models.d_fine.d_fine_backbone import (
112
+ DFineBackbone as DFineBackbone,
113
+ )
114
+ from keras_hub.src.models.d_fine.d_fine_object_detector import (
115
+ DFineObjectDetector as DFineObjectDetector,
116
+ )
117
+ from keras_hub.src.models.d_fine.d_fine_object_detector_preprocessor import (
118
+ DFineObjectDetectorPreprocessor as DFineObjectDetectorPreprocessor,
119
+ )
111
120
  from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
112
121
  DebertaV3Backbone as DebertaV3Backbone,
113
122
  )
@@ -157,9 +166,27 @@ from keras_hub.src.models.densenet.densenet_image_classifier import (
157
166
  from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import (
158
167
  DenseNetImageClassifierPreprocessor as DenseNetImageClassifierPreprocessor,
159
168
  )
169
+ from keras_hub.src.models.depth_anything.depth_anything_backbone import (
170
+ DepthAnythingBackbone as DepthAnythingBackbone,
171
+ )
172
+ from keras_hub.src.models.depth_anything.depth_anything_depth_estimator import (
173
+ DepthAnythingDepthEstimator as DepthAnythingDepthEstimator,
174
+ )
175
+ from keras_hub.src.models.depth_anything.depth_anything_depth_estimator_preprocessor import (
176
+ DepthAnythingDepthEstimatorPreprocessor as DepthAnythingDepthEstimatorPreprocessor,
177
+ )
178
+ from keras_hub.src.models.depth_estimator import (
179
+ DepthEstimator as DepthEstimator,
180
+ )
181
+ from keras_hub.src.models.depth_estimator_preprocessor import (
182
+ DepthEstimatorPreprocessor as DepthEstimatorPreprocessor,
183
+ )
160
184
  from keras_hub.src.models.dinov2.dinov2_backbone import (
161
185
  DINOV2Backbone as DINOV2Backbone,
162
186
  )
187
+ from keras_hub.src.models.dinov3.dinov3_backbone import (
188
+ DINOV3Backbone as DINOV3Backbone,
189
+ )
163
190
  from keras_hub.src.models.distil_bert.distil_bert_backbone import (
164
191
  DistilBertBackbone as DistilBertBackbone,
165
192
  )
@@ -404,6 +431,15 @@ from keras_hub.src.models.mobilenet.mobilenet_image_classifier import (
404
431
  from keras_hub.src.models.mobilenet.mobilenet_image_classifier_preprocessor import (
405
432
  MobileNetImageClassifierPreprocessor as MobileNetImageClassifierPreprocessor,
406
433
  )
434
+ from keras_hub.src.models.mobilenetv5.mobilenetv5_backbone import (
435
+ MobileNetV5Backbone as MobileNetV5Backbone,
436
+ )
437
+ from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier import (
438
+ MobileNetV5ImageClassifier as MobileNetV5ImageClassifier,
439
+ )
440
+ from keras_hub.src.models.mobilenetv5.mobilenetv5_image_classifier_preprocessor import (
441
+ MobileNetV5ImageClassifierPreprocessor as MobileNetV5ImageClassifierPreprocessor,
442
+ )
407
443
  from keras_hub.src.models.moonshine.moonshine_audio_to_text import (
408
444
  MoonshineAudioToText as MoonshineAudioToText,
409
445
  )
@@ -446,6 +482,18 @@ from keras_hub.src.models.pali_gemma.pali_gemma_causal_lm_preprocessor import (
446
482
  from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
447
483
  PaliGemmaTokenizer as PaliGemmaTokenizer,
448
484
  )
485
+ from keras_hub.src.models.parseq.parseq_backbone import (
486
+ PARSeqBackbone as PARSeqBackbone,
487
+ )
488
+ from keras_hub.src.models.parseq.parseq_causal_lm import (
489
+ PARSeqCausalLM as PARSeqCausalLM,
490
+ )
491
+ from keras_hub.src.models.parseq.parseq_causal_lm_preprocessor import (
492
+ PARSeqCausalLMPreprocessor as PARSeqCausalLMPreprocessor,
493
+ )
494
+ from keras_hub.src.models.parseq.parseq_tokenizer import (
495
+ PARSeqTokenizer as PARSeqTokenizer,
496
+ )
449
497
  from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone as Phi3Backbone
450
498
  from keras_hub.src.models.phi3.phi3_causal_lm import (
451
499
  Phi3CausalLM as Phi3CausalLM,
@@ -491,6 +539,15 @@ from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
491
539
  from keras_hub.src.models.qwen3.qwen3_tokenizer import (
492
540
  Qwen3Tokenizer as Qwen3Tokenizer,
493
541
  )
542
+ from keras_hub.src.models.qwen3_moe.qwen3_moe_backbone import (
543
+ Qwen3MoeBackbone as Qwen3MoeBackbone,
544
+ )
545
+ from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm import (
546
+ Qwen3MoeCausalLM as Qwen3MoeCausalLM,
547
+ )
548
+ from keras_hub.src.models.qwen3_moe.qwen3_moe_causal_lm_preprocessor import (
549
+ Qwen3MoeCausalLMPreprocessor as Qwen3MoeCausalLMPreprocessor,
550
+ )
494
551
  from keras_hub.src.models.qwen_moe.qwen_moe_backbone import (
495
552
  QwenMoeBackbone as QwenMoeBackbone,
496
553
  )
@@ -595,6 +652,30 @@ from keras_hub.src.models.siglip.siglip_tokenizer import (
595
652
  from keras_hub.src.models.siglip.siglip_vision_encoder import (
596
653
  SigLIPVisionEncoder as SigLIPVisionEncoder,
597
654
  )
655
+ from keras_hub.src.models.smollm3.smollm3_backbone import (
656
+ SmolLM3Backbone as SmolLM3Backbone,
657
+ )
658
+ from keras_hub.src.models.smollm3.smollm3_backbone import (
659
+ SmolLM3Backbone as SmolLMBackbone,
660
+ )
661
+ from keras_hub.src.models.smollm3.smollm3_causal_lm import (
662
+ SmolLM3CausalLM as SmolLM3CausalLM,
663
+ )
664
+ from keras_hub.src.models.smollm3.smollm3_causal_lm import (
665
+ SmolLM3CausalLM as SmolLMCausalLM,
666
+ )
667
+ from keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor import (
668
+ SmolLM3CausalLMPreprocessor as SmolLM3CausalLMPreprocessor,
669
+ )
670
+ from keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor import (
671
+ SmolLM3CausalLMPreprocessor as SmolLMCausalLMPreprocessor,
672
+ )
673
+ from keras_hub.src.models.smollm3.smollm3_tokenizer import (
674
+ SmolLM3Tokenizer as SmolLM3Tokenizer,
675
+ )
676
+ from keras_hub.src.models.smollm3.smollm3_tokenizer import (
677
+ SmolLM3Tokenizer as SmolLMTokenizer,
678
+ )
598
679
  from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (
599
680
  StableDiffusion3Backbone as StableDiffusion3Backbone,
600
681
  )
@@ -615,6 +696,18 @@ from keras_hub.src.models.t5.t5_preprocessor import (
615
696
  T5Preprocessor as T5Preprocessor,
616
697
  )
617
698
  from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer as T5Tokenizer
699
+ from keras_hub.src.models.t5gemma.t5gemma_backbone import (
700
+ T5GemmaBackbone as T5GemmaBackbone,
701
+ )
702
+ from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm import (
703
+ T5GemmaSeq2SeqLM as T5GemmaSeq2SeqLM,
704
+ )
705
+ from keras_hub.src.models.t5gemma.t5gemma_seq_2_seq_lm_preprocessor import (
706
+ T5GemmaSeq2SeqLMPreprocessor as T5GemmaSeq2SeqLMPreprocessor,
707
+ )
708
+ from keras_hub.src.models.t5gemma.t5gemma_tokenizer import (
709
+ T5GemmaTokenizer as T5GemmaTokenizer,
710
+ )
618
711
  from keras_hub.src.models.task import Task as Task
619
712
  from keras_hub.src.models.text_classifier import TextClassifier as Classifier
620
713
  from keras_hub.src.models.text_classifier import (
@@ -31,6 +31,11 @@ class PositionEmbedding(keras.layers.Layer):
31
31
  start_index: An integer or integer tensor. The starting position to
32
32
  compute the position embedding from. This is useful during cached
33
33
  decoding, where each position is predicted separately in a loop.
34
+ positions: Tensor of shape `(sequence_length,)` or
35
+ `(batch_size, sequence_length)`. Custom positions for the input
36
+ sequence. If specified, this tensor will be used to
37
+ compute the position embedding, and the `start_index` argument will
38
+ be ignored. This is useful for cases with non-standard positions.
34
39
 
35
40
  Example:
36
41
 
@@ -91,18 +96,28 @@ class PositionEmbedding(keras.layers.Layer):
91
96
  )
92
97
  self.built = True
93
98
 
94
- def call(self, inputs, start_index=0):
99
+ def call(self, inputs, start_index=0, positions=None):
95
100
  shape = ops.shape(inputs)
96
101
  feature_length = shape[-1]
97
102
  sequence_length = shape[-2]
98
103
  # trim to match the length of the input sequence, which might be less
99
104
  # than the sequence_length of the layer.
100
105
  position_embeddings = ops.convert_to_tensor(self.position_embeddings)
101
- position_embeddings = ops.slice(
102
- position_embeddings,
103
- (start_index, 0),
104
- (sequence_length, feature_length),
105
- )
106
+ if positions is None:
107
+ position_embeddings = ops.slice(
108
+ position_embeddings,
109
+ (start_index, 0),
110
+ (sequence_length, feature_length),
111
+ )
112
+ else:
113
+ # Take care of unbatched `positions`.
114
+ if len(ops.shape(positions)) == 1:
115
+ positions = ops.expand_dims(positions, axis=0)
116
+
117
+ position_embeddings = ops.take(
118
+ position_embeddings, positions, axis=0
119
+ )
120
+
106
121
  return ops.broadcast_to(position_embeddings, shape)
107
122
 
108
123
  def compute_output_shape(self, input_shape):
@@ -235,7 +235,8 @@ class ReversibleEmbedding(keras.layers.Embedding):
235
235
 
236
236
  return super()._int8_call(inputs)
237
237
 
238
- def quantize(self, mode, type_check=True):
238
+ def quantize(self, mode, type_check=True, config=None):
239
+ del config
239
240
  if type_check and type(self) is not ReversibleEmbedding:
240
241
  raise self._not_implemented_error(self.quantize)
241
242
 
@@ -244,6 +245,12 @@ class ReversibleEmbedding(keras.layers.Embedding):
244
245
  inputs, axis=axis, to_numpy=True
245
246
  )
246
247
 
248
+ if mode != "int8":
249
+ raise NotImplementedError(
250
+ "Invalid quantization mode. Expected 'int8'. "
251
+ f"Received: quantization_mode={mode}"
252
+ )
253
+
247
254
  embeddings_shape = (self.input_dim, self.output_dim)
248
255
  if mode == "int8":
249
256
  embeddings, embeddings_scale = abs_max_quantize(
@@ -37,6 +37,11 @@ class RotaryEmbedding(keras.layers.Layer):
37
37
  start_index: An integer or integer tensor. The starting position to
38
38
  compute the rotary embedding from. This is useful during cached
39
39
  decoding, where each position is predicted separately in a loop.
40
+ positions: Tensor of shape `(sequence_length,)` or
41
+ `(batch_size, sequence_length)`. Custom positions for the input
42
+ sequence. If specified, this tensor will be used to
43
+ compute the rotary embedding, and the `start_index` argument will
44
+ be ignored. This is useful for cases with non-standard positions.
40
45
 
41
46
  Examples:
42
47
 
@@ -76,6 +81,11 @@ class RotaryEmbedding(keras.layers.Layer):
76
81
  self.built = True
77
82
 
78
83
  def call(self, inputs, start_index=0, positions=None):
84
+ # Take care of unbatched `positions`.
85
+ if positions is not None:
86
+ if len(ops.shape(positions)) == 1:
87
+ positions = ops.expand_dims(positions, axis=0)
88
+
79
89
  inputs = ops.moveaxis(
80
90
  inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
81
91
  )
@@ -103,6 +113,7 @@ class RotaryEmbedding(keras.layers.Layer):
103
113
  return positions + ops.cast(start_index, dtype="float32")
104
114
 
105
115
  def _compute_cos_sin_embedding(self, inputs, start_index=0, positions=None):
116
+ batch_axis = 0
106
117
  feature_axis = len(inputs.shape) - 1
107
118
  sequence_axis = 1
108
119
 
@@ -111,21 +122,20 @@ class RotaryEmbedding(keras.layers.Layer):
111
122
 
112
123
  if positions is None:
113
124
  positions = self._compute_positions(inputs, start_index)
125
+ positions = ops.expand_dims(positions, axis=batch_axis)
114
126
  else:
115
127
  positions = ops.cast(positions, "float32")
116
-
117
128
  positions = positions / ops.cast(self.scaling_factor, "float32")
118
- freq = ops.einsum("i,j->ij", positions, inverse_freq)
129
+
130
+ freq = ops.einsum("bi,j->bij", positions, inverse_freq)
131
+
119
132
  embedding = ops.stack((freq, freq), axis=-2)
120
133
  embedding = ops.reshape(
121
134
  embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
122
135
  )
123
136
 
124
- # Reshape the embedding to be broadcastable with input shape.
125
- if feature_axis < sequence_axis:
126
- embedding = ops.transpose(embedding)
127
137
  for axis in range(len(inputs.shape)):
128
- if axis != sequence_axis and axis != feature_axis:
138
+ if axis not in (batch_axis, sequence_axis, feature_axis):
129
139
  embedding = ops.expand_dims(embedding, axis)
130
140
 
131
141
  cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
@@ -30,6 +30,11 @@ class SinePositionEncoding(keras.layers.Layer):
30
30
  start_index: An integer or integer tensor. The starting position to
31
31
  compute the encoding from. This is useful during cached decoding,
32
32
  where each position is predicted separately in a loop.
33
+ positions: Tensor of shape `(sequence_length,)` or
34
+ `(batch_size, sequence_length)`. Custom positions for the input
35
+ sequence. If specified, this tensor will be used to
36
+ compute the position embedding, and the `start_index` argument will
37
+ be ignored. This is useful for cases with non-standard positions.
33
38
 
34
39
  Example:
35
40
  ```python
@@ -58,27 +63,35 @@ class SinePositionEncoding(keras.layers.Layer):
58
63
  self.max_wavelength = max_wavelength
59
64
  self.built = True
60
65
 
61
- def call(self, inputs, start_index=0):
66
+ def call(self, inputs, start_index=0, positions=None):
62
67
  shape = ops.shape(inputs)
63
68
  seq_length = shape[-2]
64
69
  hidden_size = shape[-1]
65
- positions = ops.arange(seq_length)
66
- positions = ops.cast(positions + start_index, self.compute_dtype)
70
+
71
+ if positions is None:
72
+ positions = ops.arange(seq_length)
73
+ positions = ops.cast(positions + start_index, self.compute_dtype)
74
+
75
+ # Take care of unbatched `positions`.
76
+ if len(ops.shape(positions)) == 1:
77
+ positions = ops.expand_dims(positions, axis=0)
78
+
67
79
  min_freq = ops.cast(1 / self.max_wavelength, dtype=self.compute_dtype)
68
80
  timescales = ops.power(
69
81
  min_freq,
70
82
  ops.cast(2 * (ops.arange(hidden_size) // 2), self.compute_dtype)
71
83
  / ops.cast(hidden_size, self.compute_dtype),
72
84
  )
73
- angles = ops.expand_dims(positions, 1) * ops.expand_dims(timescales, 0)
85
+ angles = ops.einsum("bi,j->bij", positions, timescales)
86
+
74
87
  # even indices are sine, odd are cosine
75
88
  cos_mask = ops.cast(ops.arange(hidden_size) % 2, self.compute_dtype)
76
89
  sin_mask = 1 - cos_mask
77
- # embedding shape is [seq_length, hidden_size]
78
- positional_encodings = (
79
- ops.sin(angles) * sin_mask + ops.cos(angles) * cos_mask
80
- )
81
90
 
91
+ # embedding shape is `[bsz (or 1), seq_length, hidden_size]`.
92
+ positional_encodings = ops.einsum(
93
+ "bij,j->bij", ops.sin(angles), sin_mask
94
+ ) + ops.einsum("bij,j->bij", ops.cos(angles), cos_mask)
82
95
  return ops.broadcast_to(positional_encodings, shape)
83
96
 
84
97
  def get_config(self):
@@ -120,11 +120,12 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
120
120
  )
121
121
  return config
122
122
 
123
- def call(self, inputs, start_index=0):
123
+ def call(self, inputs, start_index=0, positions=None):
124
124
  embedded_tokens = self.token_embedding(inputs)
125
125
  embedded_positions = self.position_embedding(
126
126
  embedded_tokens,
127
127
  start_index=start_index,
128
+ positions=positions,
128
129
  )
129
130
  outputs = embedded_tokens + embedded_positions
130
131
  return outputs
@@ -91,21 +91,16 @@ class Backbone(keras.Model):
91
91
  }
92
92
 
93
93
  # Add quantization support by utilizing `DTypePolicyMap`
94
- try:
95
- if isinstance(
96
- self.dtype_policy, keras.dtype_policies.DTypePolicyMap
97
- ):
98
- config.update({"dtype": self.dtype_policy})
99
- else:
100
- policy_map = keras.dtype_policies.DTypePolicyMap()
101
- for layer in self._flatten_layers():
102
- if layer.quantization_mode is not None:
103
- policy_map[layer.path] = layer.dtype_policy
104
- if len(policy_map) > 0:
105
- config.update({"dtype": policy_map})
106
- # Before Keras 3.2, there is no `keras.dtype_policies.get`.
107
- except AttributeError:
108
- pass
94
+ dtype = self.dtype_policy
95
+ if not isinstance(dtype, keras.dtype_policies.DTypePolicyMap):
96
+ policy_map = keras.dtype_policies.DTypePolicyMap()
97
+ for layer in self._flatten_layers():
98
+ if layer.quantization_mode is not None:
99
+ policy_map[layer.path] = layer.dtype_policy
100
+ if len(policy_map) > 0:
101
+ dtype = policy_map
102
+
103
+ config.update({"dtype": keras.dtype_policies.serialize(dtype)})
109
104
  return config
110
105
 
111
106
  @classmethod
@@ -135,7 +130,8 @@ class Backbone(keras.Model):
135
130
  1. a built-in preset identifier like `'bert_base_en'`
136
131
  2. a Kaggle Models handle like `'kaggle://user/bert/keras/bert_base_en'`
137
132
  3. a Hugging Face handle like `'hf://user/bert_base_en'`
138
- 4. a path to a local preset directory like `'./bert_base_en'`
133
+ 4. a ModelScope handle like `'modelscope://user/bert_base_en'`
134
+ 5. a path to a local preset directory like `'./bert_base_en'`
139
135
 
140
136
  This constructor can be called in one of two ways. Either from the base
141
137
  class like `keras_hub.models.Backbone.from_preset()`, or from
@@ -277,3 +273,19 @@ class Backbone(keras.Model):
277
273
  layer.lora_kernel_a.assign(lora_kernel_a)
278
274
  layer.lora_kernel_b.assign(lora_kernel_b)
279
275
  store.close()
276
+
277
+ def export_to_transformers(self, path):
278
+ """Export the backbone model to HuggingFace Transformers format.
279
+
280
+ This saves the backbone's configuration and weights in a format
281
+ compatible with HuggingFace Transformers. For unsupported model
282
+ architectures, a ValueError is raised.
283
+
284
+ Args:
285
+ path: str. Path to save the exported model.
286
+ """
287
+ from keras_hub.src.utils.transformers.export.hf_exporter import (
288
+ export_backbone,
289
+ )
290
+
291
+ export_backbone(self, path)
@@ -132,6 +132,17 @@ class CausalLM(Task):
132
132
  return self.generate_function
133
133
 
134
134
  self.generate_function = self.generate_step
135
+ if keras.config.backend() == "openvino":
136
+ from keras_hub.src.utils.openvino_utils import ov_infer
137
+
138
+ def wrapped_generate_function(inputs, stop_token_ids=None):
139
+ # Convert to numpy for OpenVINO backend
140
+ inputs = tree.map_structure(ops.array, inputs)
141
+ return ov_infer(
142
+ self, inputs, stop_token_ids, self.generate_step
143
+ )
144
+
145
+ self.generate_function = wrapped_generate_function
135
146
  if keras.config.backend() == "torch":
136
147
  import torch
137
148
 
@@ -392,3 +403,29 @@ class CausalLM(Task):
392
403
  outputs = [postprocess(x) for x in outputs]
393
404
 
394
405
  return self._normalize_generate_outputs(outputs, input_is_scalar)
406
+
407
+ def export_to_transformers(self, path):
408
+ """Export the full CausalLM model to HuggingFace Transformers format.
409
+
410
+ This exports the trainable model, tokenizer, and configurations in a
411
+ format compatible with HuggingFace Transformers. For unsupported model
412
+ architectures, a ValueError is raised.
413
+
414
+ If the preprocessor is attached (default), both the trainable model and
415
+ tokenizer are exported. To export only the trainable model, set
416
+ `self.preprocessor = None` before calling this method, then export the
417
+ preprocessor separately via `preprocessor.export_to_transformers(path)`.
418
+
419
+ Args:
420
+ path: str. Path to save the exported model.
421
+ """
422
+ from keras_hub.src.utils.transformers.export.hf_exporter import (
423
+ export_to_safetensors,
424
+ )
425
+
426
+ export_to_safetensors(self, path)
427
+
428
+ def _post_quantize(self, mode, **kwargs):
429
+ super()._post_quantize(mode, **kwargs)
430
+ # Reset the compiled generate function.
431
+ self.generate_function = None
@@ -180,3 +180,17 @@ class CausalLMPreprocessor(Preprocessor):
180
180
  self._sequence_length = value
181
181
  if self.packer is not None:
182
182
  self.packer.sequence_length = value
183
+
184
+ def export_to_transformers(self, path):
185
+ """Export the preprocessor to HuggingFace Transformers format.
186
+
187
+ Args:
188
+ path: str. Path to save the exported preprocessor/tokenizer.
189
+ """
190
+ if self.tokenizer is None:
191
+ raise ValueError("Preprocessor must have a tokenizer for export.")
192
+ from keras_hub.src.utils.transformers.export.hf_exporter import (
193
+ export_tokenizer,
194
+ )
195
+
196
+ export_tokenizer(self.tokenizer, path)
@@ -11,7 +11,7 @@ backbone_presets = {
11
11
  "params": 149620934,
12
12
  "path": "clip",
13
13
  },
14
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch16/2",
14
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch16/3",
15
15
  },
16
16
  "clip_vit_base_patch32": {
17
17
  "metadata": {
@@ -22,7 +22,7 @@ backbone_presets = {
22
22
  "params": 151277363,
23
23
  "path": "clip",
24
24
  },
25
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch32/2",
25
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_base_patch32/3",
26
26
  },
27
27
  "clip_vit_large_patch14": {
28
28
  "metadata": {
@@ -33,7 +33,7 @@ backbone_presets = {
33
33
  "params": 427616770,
34
34
  "path": "clip",
35
35
  },
36
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14/2",
36
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14/3",
37
37
  },
38
38
  "clip_vit_large_patch14_336": {
39
39
  "metadata": {
@@ -44,7 +44,7 @@ backbone_presets = {
44
44
  "params": 427944770,
45
45
  "path": "clip",
46
46
  },
47
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14_336/2",
47
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_large_patch14_336/3",
48
48
  },
49
49
  "clip_vit_b_32_laion2b_s34b_b79k": {
50
50
  "metadata": {
@@ -55,7 +55,7 @@ backbone_presets = {
55
55
  "params": 151277363,
56
56
  "path": "clip",
57
57
  },
58
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_b_32_laion2b_s34b_b79k/2",
58
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_b_32_laion2b_s34b_b79k/3",
59
59
  },
60
60
  "clip_vit_h_14_laion2b_s32b_b79k": {
61
61
  "metadata": {
@@ -66,7 +66,7 @@ backbone_presets = {
66
66
  "params": 986109698,
67
67
  "path": "clip",
68
68
  },
69
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_h_14_laion2b_s32b_b79k/2",
69
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_h_14_laion2b_s32b_b79k/3",
70
70
  },
71
71
  "clip_vit_g_14_laion2b_s12b_b42k": {
72
72
  "metadata": {
@@ -77,7 +77,7 @@ backbone_presets = {
77
77
  "params": 1366678530,
78
78
  "path": "clip",
79
79
  },
80
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_g_14_laion2b_s12b_b42k/2",
80
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_g_14_laion2b_s12b_b42k/3",
81
81
  },
82
82
  "clip_vit_bigg_14_laion2b_39b_b160k": {
83
83
  "metadata": {
@@ -88,6 +88,6 @@ backbone_presets = {
88
88
  "params": 2539567362,
89
89
  "path": "clip",
90
90
  },
91
- "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_bigg_14_laion2b_39b_b160k/2",
91
+ "kaggle_handle": "kaggle://keras/clip/keras/clip_vit_bigg_14_laion2b_39b_b160k/3",
92
92
  },
93
93
  }
@@ -0,0 +1,5 @@
1
+ from keras_hub.src.models.d_fine.d_fine_backbone import DFineBackbone
2
+ from keras_hub.src.models.d_fine.d_fine_presets import backbone_presets
3
+ from keras_hub.src.utils.preset_utils import register_presets
4
+
5
+ register_presets(backbone_presets, DFineBackbone)