keras-hub-nightly 0.16.1.dev202410020340__py3-none-any.whl → 0.19.0.dev202501260345__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +21 -3
- keras_hub/api/models/__init__.py +71 -12
- keras_hub/api/tokenizers/__init__.py +1 -1
- keras_hub/src/bounding_box/__init__.py +2 -0
- keras_hub/src/bounding_box/converters.py +102 -12
- keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
- keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
- keras_hub/src/layers/modeling/reversible_embedding.py +3 -16
- keras_hub/src/layers/modeling/rms_normalization.py +36 -0
- keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
- keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
- keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
- keras_hub/src/layers/modeling/transformer_encoder.py +29 -7
- keras_hub/src/layers/preprocessing/audio_converter.py +3 -7
- keras_hub/src/layers/preprocessing/image_converter.py +170 -34
- keras_hub/src/metrics/bleu.py +4 -3
- keras_hub/src/models/albert/albert_presets.py +4 -12
- keras_hub/src/models/albert/albert_text_classifier.py +7 -7
- keras_hub/src/models/backbone.py +3 -14
- keras_hub/src/models/bart/bart_backbone.py +4 -4
- keras_hub/src/models/bart/bart_presets.py +3 -9
- keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
- keras_hub/src/models/basnet/__init__.py +5 -0
- keras_hub/src/models/basnet/basnet.py +122 -0
- keras_hub/src/models/basnet/basnet_backbone.py +366 -0
- keras_hub/src/models/basnet/basnet_image_converter.py +8 -0
- keras_hub/src/models/basnet/basnet_preprocessor.py +14 -0
- keras_hub/src/models/basnet/basnet_presets.py +17 -0
- keras_hub/src/models/bert/bert_presets.py +14 -32
- keras_hub/src/models/bert/bert_text_classifier.py +3 -3
- keras_hub/src/models/bloom/bloom_presets.py +8 -24
- keras_hub/src/models/causal_lm.py +56 -12
- keras_hub/src/models/clip/__init__.py +5 -0
- keras_hub/src/models/clip/clip_backbone.py +286 -0
- keras_hub/src/models/clip/clip_encoder_block.py +19 -4
- keras_hub/src/models/clip/clip_image_converter.py +8 -0
- keras_hub/src/models/clip/clip_presets.py +93 -0
- keras_hub/src/models/clip/clip_text_encoder.py +4 -1
- keras_hub/src/models/clip/clip_tokenizer.py +18 -3
- keras_hub/src/models/clip/clip_vision_embedding.py +101 -0
- keras_hub/src/models/clip/clip_vision_encoder.py +159 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
- keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py +0 -109
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
- keras_hub/src/models/deberta_v3/deberta_v3_presets.py +5 -15
- keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
- keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
- keras_hub/src/models/deeplab_v3/__init__.py +7 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +200 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_image_converter.py +10 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_image_segmeter_preprocessor.py +16 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py +215 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +17 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +111 -0
- keras_hub/src/models/densenet/densenet_backbone.py +6 -4
- keras_hub/src/models/densenet/densenet_image_classifier.py +1 -129
- keras_hub/src/models/densenet/densenet_image_converter.py +2 -4
- keras_hub/src/models/densenet/densenet_presets.py +9 -15
- keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/distil_bert/distil_bert_presets.py +5 -10
- keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
- keras_hub/src/models/efficientnet/__init__.py +9 -0
- keras_hub/src/models/efficientnet/cba.py +141 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +160 -61
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +14 -0
- keras_hub/src/models/efficientnet/efficientnet_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/efficientnet/efficientnet_image_converter.py +10 -0
- keras_hub/src/models/efficientnet/efficientnet_presets.py +193 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +84 -41
- keras_hub/src/models/efficientnet/mbconv.py +53 -22
- keras_hub/src/models/electra/electra_backbone.py +2 -2
- keras_hub/src/models/electra/electra_presets.py +6 -18
- keras_hub/src/models/f_net/f_net_presets.py +2 -6
- keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
- keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
- keras_hub/src/models/falcon/falcon_backbone.py +5 -3
- keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
- keras_hub/src/models/falcon/falcon_presets.py +1 -3
- keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
- keras_hub/src/models/feature_pyramid_backbone.py +1 -1
- keras_hub/src/models/flux/__init__.py +5 -0
- keras_hub/src/models/flux/flux_layers.py +496 -0
- keras_hub/src/models/flux/flux_maths.py +225 -0
- keras_hub/src/models/flux/flux_model.py +236 -0
- keras_hub/src/models/flux/flux_presets.py +3 -0
- keras_hub/src/models/flux/flux_text_to_image.py +146 -0
- keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +73 -0
- keras_hub/src/models/gemma/gemma_backbone.py +35 -20
- keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
- keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
- keras_hub/src/models/gemma/gemma_presets.py +29 -63
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
- keras_hub/src/models/gpt2/gpt2_presets.py +5 -14
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
- keras_hub/src/models/image_classifier.py +147 -2
- keras_hub/src/models/image_classifier_preprocessor.py +6 -3
- keras_hub/src/models/image_object_detector.py +87 -0
- keras_hub/src/models/image_object_detector_preprocessor.py +57 -0
- keras_hub/src/models/image_segmenter.py +0 -5
- keras_hub/src/models/image_segmenter_preprocessor.py +29 -4
- keras_hub/src/models/image_to_image.py +417 -0
- keras_hub/src/models/inpaint.py +520 -0
- keras_hub/src/models/llama/llama_backbone.py +138 -12
- keras_hub/src/models/llama/llama_causal_lm.py +3 -1
- keras_hub/src/models/llama/llama_presets.py +10 -20
- keras_hub/src/models/llama3/llama3_backbone.py +12 -11
- keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
- keras_hub/src/models/llama3/llama3_presets.py +4 -12
- keras_hub/src/models/llama3/llama3_tokenizer.py +25 -2
- keras_hub/src/models/mistral/mistral_backbone.py +16 -15
- keras_hub/src/models/mistral/mistral_causal_lm.py +6 -4
- keras_hub/src/models/mistral/mistral_presets.py +3 -9
- keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
- keras_hub/src/models/mit/__init__.py +6 -0
- keras_hub/src/models/{mix_transformer/mix_transformer_backbone.py → mit/mit_backbone.py} +47 -36
- keras_hub/src/models/mit/mit_image_classifier.py +12 -0
- keras_hub/src/models/mit/mit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/mit/mit_image_converter.py +8 -0
- keras_hub/src/models/{mix_transformer/mix_transformer_layers.py → mit/mit_layers.py} +20 -13
- keras_hub/src/models/mit/mit_presets.py +139 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +8 -8
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +0 -92
- keras_hub/src/models/opt/opt_causal_lm.py +2 -2
- keras_hub/src/models/opt/opt_presets.py +4 -12
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +63 -17
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py +3 -1
- keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py +21 -23
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +2 -4
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +173 -17
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +14 -26
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -1
- keras_hub/src/models/phi3/phi3_decoder.py +0 -1
- keras_hub/src/models/phi3/phi3_presets.py +2 -6
- keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
- keras_hub/src/models/preprocessor.py +25 -11
- keras_hub/src/models/resnet/resnet_backbone.py +3 -14
- keras_hub/src/models/resnet/resnet_image_classifier.py +0 -137
- keras_hub/src/models/resnet/resnet_image_converter.py +2 -4
- keras_hub/src/models/resnet/resnet_presets.py +127 -18
- keras_hub/src/models/retinanet/__init__.py +5 -0
- keras_hub/src/models/retinanet/anchor_generator.py +52 -53
- keras_hub/src/models/retinanet/feature_pyramid.py +103 -39
- keras_hub/src/models/retinanet/non_max_supression.py +1 -0
- keras_hub/src/models/retinanet/prediction_head.py +192 -0
- keras_hub/src/models/retinanet/retinanet_backbone.py +146 -0
- keras_hub/src/models/retinanet/retinanet_image_converter.py +53 -0
- keras_hub/src/models/retinanet/retinanet_label_encoder.py +49 -51
- keras_hub/src/models/retinanet/retinanet_object_detector.py +381 -0
- keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/retinanet/retinanet_presets.py +16 -0
- keras_hub/src/models/roberta/roberta_backbone.py +2 -2
- keras_hub/src/models/roberta/roberta_presets.py +6 -8
- keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
- keras_hub/src/models/sam/__init__.py +5 -0
- keras_hub/src/models/sam/sam_backbone.py +2 -3
- keras_hub/src/models/sam/sam_image_converter.py +2 -4
- keras_hub/src/models/sam/sam_image_segmenter.py +16 -16
- keras_hub/src/models/sam/sam_image_segmenter_preprocessor.py +11 -1
- keras_hub/src/models/sam/sam_layers.py +5 -3
- keras_hub/src/models/sam/sam_presets.py +3 -9
- keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
- keras_hub/src/models/sam/sam_transformer.py +5 -4
- keras_hub/src/models/segformer/__init__.py +8 -0
- keras_hub/src/models/segformer/segformer_backbone.py +167 -0
- keras_hub/src/models/segformer/segformer_image_converter.py +8 -0
- keras_hub/src/models/segformer/segformer_image_segmenter.py +184 -0
- keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +31 -0
- keras_hub/src/models/segformer/segformer_presets.py +136 -0
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
- keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +8 -1
- keras_hub/src/models/stable_diffusion_3/mmdit.py +577 -190
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +189 -163
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +178 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +193 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +43 -7
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +25 -14
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
- keras_hub/src/models/t5/t5_backbone.py +5 -4
- keras_hub/src/models/t5/t5_presets.py +47 -19
- keras_hub/src/models/task.py +47 -39
- keras_hub/src/models/text_classifier.py +2 -2
- keras_hub/src/models/text_to_image.py +106 -41
- keras_hub/src/models/vae/__init__.py +1 -0
- keras_hub/src/models/vae/vae_backbone.py +184 -0
- keras_hub/src/models/vae/vae_layers.py +739 -0
- keras_hub/src/models/vgg/__init__.py +5 -0
- keras_hub/src/models/vgg/vgg_backbone.py +4 -24
- keras_hub/src/models/vgg/vgg_image_classifier.py +139 -33
- keras_hub/src/models/vgg/vgg_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vgg/vgg_image_converter.py +8 -0
- keras_hub/src/models/vgg/vgg_presets.py +48 -0
- keras_hub/src/models/vit/__init__.py +5 -0
- keras_hub/src/models/vit/vit_backbone.py +152 -0
- keras_hub/src/models/vit/vit_image_classifier.py +187 -0
- keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vit/vit_image_converter.py +73 -0
- keras_hub/src/models/vit/vit_layers.py +391 -0
- keras_hub/src/models/vit/vit_presets.py +126 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +6 -4
- keras_hub/src/models/vit_det/vit_layers.py +3 -3
- keras_hub/src/models/whisper/whisper_audio_converter.py +2 -4
- keras_hub/src/models/whisper/whisper_backbone.py +6 -5
- keras_hub/src/models/whisper/whisper_decoder.py +3 -5
- keras_hub/src/models/whisper/whisper_presets.py +10 -30
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/xlm_roberta/xlm_roberta_presets.py +2 -6
- keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
- keras_hub/src/models/xlnet/relative_attention.py +20 -19
- keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
- keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
- keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
- keras_hub/src/samplers/contrastive_sampler.py +2 -3
- keras_hub/src/samplers/sampler.py +2 -1
- keras_hub/src/tests/test_case.py +41 -6
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +7 -3
- keras_hub/src/tokenizers/byte_tokenizer.py +3 -10
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
- keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +9 -11
- keras_hub/src/tokenizers/tokenizer.py +10 -13
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +9 -7
- keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +10 -3
- keras_hub/src/utils/keras_utils.py +2 -13
- keras_hub/src/utils/pipeline_model.py +3 -3
- keras_hub/src/utils/preset_utils.py +196 -144
- keras_hub/src/utils/tensor_utils.py +4 -4
- keras_hub/src/utils/timm/convert_densenet.py +6 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +447 -0
- keras_hub/src/utils/timm/convert_resnet.py +1 -1
- keras_hub/src/utils/timm/convert_vgg.py +85 -0
- keras_hub/src/utils/timm/preset_loader.py +14 -9
- keras_hub/src/utils/transformers/convert_llama3.py +21 -5
- keras_hub/src/utils/transformers/convert_vit.py +150 -0
- keras_hub/src/utils/transformers/preset_loader.py +23 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/METADATA +86 -68
- keras_hub_nightly-0.19.0.dev202501260345.dist-info/RECORD +423 -0
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/WHEEL +1 -1
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +0 -138
- keras_hub/src/models/mix_transformer/__init__.py +0 -0
- keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +0 -119
- keras_hub/src/models/stable_diffusion_3/vae_image_decoder.py +0 -320
- keras_hub_nightly-0.16.1.dev202410020340.dist-info/RECORD +0 -357
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
from keras import layers
|
2
|
+
from keras import ops
|
3
|
+
|
4
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
5
|
+
|
6
|
+
|
7
|
+
class CLIPVisionEmbedding(layers.Layer):
|
8
|
+
def __init__(
|
9
|
+
self,
|
10
|
+
hidden_dim,
|
11
|
+
patch_size,
|
12
|
+
image_size,
|
13
|
+
data_format=None,
|
14
|
+
dtype=None,
|
15
|
+
**kwargs,
|
16
|
+
):
|
17
|
+
super().__init__(dtype=dtype, **kwargs)
|
18
|
+
self.hidden_dim = int(hidden_dim)
|
19
|
+
self.patch_size = int(patch_size)
|
20
|
+
self.image_size = int(image_size)
|
21
|
+
data_format = standardize_data_format(data_format)
|
22
|
+
self.data_format = data_format
|
23
|
+
num_patches = (image_size // patch_size) ** 2
|
24
|
+
self.num_positions = num_patches + 1
|
25
|
+
|
26
|
+
self.patch_embedding = layers.Conv2D(
|
27
|
+
hidden_dim,
|
28
|
+
kernel_size=patch_size,
|
29
|
+
strides=patch_size,
|
30
|
+
data_format=data_format,
|
31
|
+
use_bias=False,
|
32
|
+
dtype=dtype,
|
33
|
+
name="patch_embedding",
|
34
|
+
)
|
35
|
+
self.position_embedding = layers.Embedding(
|
36
|
+
num_patches + 1, hidden_dim, dtype=dtype, name="position_embedding"
|
37
|
+
)
|
38
|
+
|
39
|
+
def build(self, input_shape):
|
40
|
+
self.class_embedding = self.add_weight(
|
41
|
+
shape=(self.hidden_dim,),
|
42
|
+
initializer="random_normal",
|
43
|
+
dtype=self.variable_dtype,
|
44
|
+
name="class_embedding",
|
45
|
+
)
|
46
|
+
self.position_ids = self.add_weight(
|
47
|
+
shape=(1, self.num_positions),
|
48
|
+
initializer="zeros",
|
49
|
+
# Let the backend determine the int dtype. For example, tf
|
50
|
+
# requires int64 for correct device placement, whereas jax and torch
|
51
|
+
# don't.
|
52
|
+
dtype=int,
|
53
|
+
trainable=False,
|
54
|
+
name="position_ids",
|
55
|
+
)
|
56
|
+
self.patch_embedding.build(input_shape)
|
57
|
+
self.position_embedding.build(self.position_ids.shape)
|
58
|
+
|
59
|
+
def call(self, inputs, training=None):
|
60
|
+
x = inputs
|
61
|
+
batch_size = ops.shape(x)[0]
|
62
|
+
patch_embeddings = self.patch_embedding(x, training=training)
|
63
|
+
if self.data_format == "channels_last":
|
64
|
+
patch_embeddings = ops.reshape(
|
65
|
+
patch_embeddings, (batch_size, -1, self.hidden_dim)
|
66
|
+
)
|
67
|
+
else:
|
68
|
+
patch_embeddings = ops.reshape(
|
69
|
+
patch_embeddings, (batch_size, self.hidden_dim, -1)
|
70
|
+
)
|
71
|
+
patch_embeddings = ops.transpose(patch_embeddings, (0, 2, 1))
|
72
|
+
class_embeddings = ops.expand_dims(self.class_embedding, axis=(0, 1))
|
73
|
+
class_embeddings = ops.tile(class_embeddings, (batch_size, 1, 1))
|
74
|
+
position_embeddings = self.position_embedding(self.position_ids)
|
75
|
+
embeddings = ops.concatenate(
|
76
|
+
[class_embeddings, patch_embeddings], axis=1
|
77
|
+
)
|
78
|
+
return ops.add(embeddings, position_embeddings)
|
79
|
+
|
80
|
+
def get_config(self):
|
81
|
+
config = super().get_config()
|
82
|
+
config.update(
|
83
|
+
{
|
84
|
+
"hidden_dim": self.hidden_dim,
|
85
|
+
"patch_size": self.patch_size,
|
86
|
+
"image_size": self.image_size,
|
87
|
+
}
|
88
|
+
)
|
89
|
+
return config
|
90
|
+
|
91
|
+
def compute_output_shape(self, input_shape):
|
92
|
+
output_shape = [input_shape[0], None, self.hidden_dim]
|
93
|
+
if self.data_format == "channels_last":
|
94
|
+
if input_shape[1] is not None and input_shape[2] is not None:
|
95
|
+
patch_num = input_shape[1] // self.patch_size
|
96
|
+
output_shape[1] = patch_num**2 + 1
|
97
|
+
else:
|
98
|
+
if input_shape[2] is not None and input_shape[3] is not None:
|
99
|
+
patch_num = input_shape[2] // self.patch_size
|
100
|
+
output_shape[1] = patch_num**2 + 1
|
101
|
+
return output_shape
|
@@ -0,0 +1,159 @@
|
|
1
|
+
from keras import layers
|
2
|
+
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
4
|
+
from keras_hub.src.models.backbone import Backbone
|
5
|
+
from keras_hub.src.models.clip.clip_encoder_block import CLIPEncoderBlock
|
6
|
+
from keras_hub.src.models.clip.clip_vision_embedding import CLIPVisionEmbedding
|
7
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
8
|
+
|
9
|
+
|
10
|
+
@keras_hub_export("keras_hub.models.CLIPVisionEncoder")
|
11
|
+
class CLIPVisionEncoder(Backbone):
|
12
|
+
"""CLIP vision core network with hyperparameters.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
patch_size: int. The size of each square patch in the input image.
|
16
|
+
hidden_dim: int. The size of the transformer hidden state at the end
|
17
|
+
of each transformer layer.
|
18
|
+
num_layers: int. The number of transformer layers.
|
19
|
+
num_heads: int. The number of attention heads for each transformer.
|
20
|
+
intermediate_dim: int. The output dimension of the first Dense layer in
|
21
|
+
a two-layer feedforward network for each transformer.
|
22
|
+
intermediate_activation: activation function. The activation that
|
23
|
+
is used for the first Dense layer in a two-layer feedforward network
|
24
|
+
for each transformer.
|
25
|
+
intermediate_output_index: optional int. The index of the intermediate
|
26
|
+
output. If specified, the output will become a dictionary with two
|
27
|
+
keys `"sequence_output"` and `"intermediate_output"`.
|
28
|
+
image_shape: tuple. The input shape without the batch size. Defaults to
|
29
|
+
`(224, 224, 3)`.
|
30
|
+
data_format: `None` or str. If specified, either `"channels_last"` or
|
31
|
+
`"channels_first"`. The ordering of the dimensions in the
|
32
|
+
inputs. `"channels_last"` corresponds to inputs with shape
|
33
|
+
`(batch_size, height, width, channels)`
|
34
|
+
while `"channels_first"` corresponds to inputs with shape
|
35
|
+
`(batch_size, channels, height, width)`. It defaults to the
|
36
|
+
`image_data_format` value found in your Keras config file at
|
37
|
+
`~/.keras/keras.json`. If you never set it, then it will be
|
38
|
+
`"channels_last"`.
|
39
|
+
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
|
40
|
+
for the models computations and weights. Note that some
|
41
|
+
computations, such as softmax and layer normalization will always
|
42
|
+
be done a float32 precision regardless of dtype.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
patch_size,
|
48
|
+
hidden_dim,
|
49
|
+
num_layers,
|
50
|
+
num_heads,
|
51
|
+
intermediate_dim,
|
52
|
+
intermediate_activation="quick_gelu",
|
53
|
+
intermediate_output_index=None,
|
54
|
+
image_shape=(224, 224, 3),
|
55
|
+
data_format=None,
|
56
|
+
dtype=None,
|
57
|
+
name=None,
|
58
|
+
**kwargs,
|
59
|
+
):
|
60
|
+
data_format = standardize_data_format(data_format)
|
61
|
+
if data_format == "channels_last":
|
62
|
+
height, width = image_shape[0], image_shape[1]
|
63
|
+
else:
|
64
|
+
height, width = image_shape[1], image_shape[2]
|
65
|
+
if height != width:
|
66
|
+
raise ValueError(
|
67
|
+
"`CLIPVisionEncoder` expects the height and width to be the "
|
68
|
+
f"same in `image_shape`. Received: image_shape={image_shape}"
|
69
|
+
)
|
70
|
+
|
71
|
+
if (
|
72
|
+
intermediate_output_index is not None
|
73
|
+
and intermediate_output_index < 0
|
74
|
+
):
|
75
|
+
intermediate_output_index += num_layers
|
76
|
+
|
77
|
+
# `prefix` is used to prevent duplicate name when utilizing multiple
|
78
|
+
# CLIP models within a single model, such as in StableDiffusion3.
|
79
|
+
prefix = str(name) + "_" if name is not None else ""
|
80
|
+
|
81
|
+
# === Layers ===
|
82
|
+
self.embedding = CLIPVisionEmbedding(
|
83
|
+
hidden_dim=hidden_dim,
|
84
|
+
patch_size=patch_size,
|
85
|
+
image_size=height,
|
86
|
+
data_format=data_format,
|
87
|
+
dtype=dtype,
|
88
|
+
name=f"{prefix}embedding",
|
89
|
+
)
|
90
|
+
self.pre_layer_norm = layers.LayerNormalization(
|
91
|
+
epsilon=1e-5, dtype=dtype, name=f"{prefix}pre_layer_norm"
|
92
|
+
)
|
93
|
+
self.encoder_layers = [
|
94
|
+
CLIPEncoderBlock(
|
95
|
+
hidden_dim,
|
96
|
+
num_heads,
|
97
|
+
intermediate_dim,
|
98
|
+
intermediate_activation,
|
99
|
+
use_causal_mask=False, # `False` in the vision encoder.
|
100
|
+
dtype=dtype,
|
101
|
+
name=f"{prefix}encoder_block_{i}",
|
102
|
+
)
|
103
|
+
for i in range(num_layers)
|
104
|
+
]
|
105
|
+
self.layer_norm = layers.LayerNormalization(
|
106
|
+
epsilon=1e-5, dtype=dtype, name=f"{prefix}layer_norm"
|
107
|
+
)
|
108
|
+
|
109
|
+
# === Functional Model ===
|
110
|
+
image_input = layers.Input(shape=image_shape, name="images")
|
111
|
+
x = self.embedding(image_input)
|
112
|
+
x = self.pre_layer_norm(x)
|
113
|
+
intermediate_output = None
|
114
|
+
for i, block in enumerate(self.encoder_layers):
|
115
|
+
x = block(x)
|
116
|
+
if i == intermediate_output_index:
|
117
|
+
intermediate_output = x
|
118
|
+
sequence_output = self.layer_norm(x)
|
119
|
+
|
120
|
+
if intermediate_output_index is not None:
|
121
|
+
outputs = {
|
122
|
+
"sequence_output": sequence_output,
|
123
|
+
"intermediate_output": intermediate_output,
|
124
|
+
}
|
125
|
+
else:
|
126
|
+
outputs = sequence_output
|
127
|
+
super().__init__(
|
128
|
+
inputs={"images": image_input},
|
129
|
+
outputs=outputs,
|
130
|
+
dtype=dtype,
|
131
|
+
name=name,
|
132
|
+
**kwargs,
|
133
|
+
)
|
134
|
+
|
135
|
+
# === Config ===
|
136
|
+
self.patch_size = patch_size
|
137
|
+
self.hidden_dim = hidden_dim
|
138
|
+
self.num_layers = num_layers
|
139
|
+
self.num_heads = num_heads
|
140
|
+
self.intermediate_dim = intermediate_dim
|
141
|
+
self.intermediate_activation = intermediate_activation
|
142
|
+
self.intermediate_output_index = intermediate_output_index
|
143
|
+
self.image_shape = image_shape
|
144
|
+
|
145
|
+
def get_config(self):
|
146
|
+
config = super().get_config()
|
147
|
+
config.update(
|
148
|
+
{
|
149
|
+
"patch_size": self.patch_size,
|
150
|
+
"hidden_dim": self.hidden_dim,
|
151
|
+
"num_layers": self.num_layers,
|
152
|
+
"num_heads": self.num_heads,
|
153
|
+
"intermediate_dim": self.intermediate_dim,
|
154
|
+
"intermediate_activation": self.intermediate_activation,
|
155
|
+
"intermediate_output_index": self.intermediate_output_index,
|
156
|
+
"image_shape": self.image_shape,
|
157
|
+
}
|
158
|
+
)
|
159
|
+
return config
|
@@ -360,7 +360,8 @@ def apply_cross_stage_partial(
|
|
360
360
|
"""
|
361
361
|
|
362
362
|
if name is None:
|
363
|
-
|
363
|
+
uid = keras.backend.get_uid("cross_stage_partial")
|
364
|
+
name = f"cross_stage_partial_{uid}"
|
364
365
|
|
365
366
|
def apply(inputs):
|
366
367
|
hidden_channels = filters // 2
|
@@ -1,5 +1,3 @@
|
|
1
|
-
import keras
|
2
|
-
|
3
1
|
from keras_hub.src.api_export import keras_hub_export
|
4
2
|
from keras_hub.src.models.csp_darknet.csp_darknet_backbone import (
|
5
3
|
CSPDarkNetBackbone,
|
@@ -9,111 +7,4 @@ from keras_hub.src.models.image_classifier import ImageClassifier
|
|
9
7
|
|
10
8
|
@keras_hub_export("keras_hub.models.CSPDarkNetImageClassifier")
|
11
9
|
class CSPDarkNetImageClassifier(ImageClassifier):
|
12
|
-
"""CSPDarkNet image classifier task model.
|
13
|
-
|
14
|
-
Args:
|
15
|
-
backbone: A `keras_hub.models.CSPDarkNetBackbone` instance.
|
16
|
-
num_classes: int. The number of classes to predict.
|
17
|
-
activation: `None`, str or callable. The activation function to use on
|
18
|
-
the `Dense` layer. Set `activation=None` to return the output
|
19
|
-
logits. Defaults to `"softmax"`.
|
20
|
-
|
21
|
-
To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
|
22
|
-
where `x` is a tensor and `y` is a integer from `[0, num_classes)`.
|
23
|
-
All `ImageClassifier` tasks include a `from_preset()` constructor which can
|
24
|
-
be used to load a pre-trained config and weights.
|
25
|
-
|
26
|
-
Examples:
|
27
|
-
|
28
|
-
Call `predict()` to run inference.
|
29
|
-
```python
|
30
|
-
# Load preset and train
|
31
|
-
images = np.ones((2, 224, 224, 3), dtype="float32")
|
32
|
-
classifier = keras_hub.models.CSPDarkNetImageClassifier.from_preset(
|
33
|
-
"csp_darknet_tiny_imagenet")
|
34
|
-
classifier.predict(images)
|
35
|
-
```
|
36
|
-
|
37
|
-
Call `fit()` on a single batch.
|
38
|
-
```python
|
39
|
-
# Load preset and train
|
40
|
-
images = np.ones((2, 224, 224, 3), dtype="float32")
|
41
|
-
labels = [0, 3]
|
42
|
-
classifier = keras_hub.models.CSPDarkNetImageClassifier.from_preset(
|
43
|
-
"csp_darknet_tiny_imagenet")
|
44
|
-
classifier.fit(x=images, y=labels, batch_size=2)
|
45
|
-
```
|
46
|
-
|
47
|
-
Call `fit()` with custom loss, optimizer and backbone.
|
48
|
-
```python
|
49
|
-
classifier = keras_hub.models.CSPDarkNetImageClassifier.from_preset(
|
50
|
-
"csp_darknet_tiny_imagenet")
|
51
|
-
classifier.compile(
|
52
|
-
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
|
53
|
-
optimizer=keras.optimizers.Adam(5e-5),
|
54
|
-
)
|
55
|
-
classifier.backbone.trainable = False
|
56
|
-
classifier.fit(x=images, y=labels, batch_size=2)
|
57
|
-
```
|
58
|
-
|
59
|
-
Custom backbone.
|
60
|
-
```python
|
61
|
-
images = np.ones((2, 224, 224, 3), dtype="float32")
|
62
|
-
labels = [0, 3]
|
63
|
-
backbone = keras_hub.models.CSPDarkNetBackbone(
|
64
|
-
stackwise_num_filters=[128, 256, 512, 1024],
|
65
|
-
stackwise_depth=[3, 9, 9, 3],
|
66
|
-
block_type="basic_block",
|
67
|
-
image_shape = (224, 224, 3),
|
68
|
-
)
|
69
|
-
classifier = keras_hub.models.CSPDarkNetImageClassifier(
|
70
|
-
backbone=backbone,
|
71
|
-
num_classes=4,
|
72
|
-
)
|
73
|
-
classifier.fit(x=images, y=labels, batch_size=2)
|
74
|
-
```
|
75
|
-
"""
|
76
|
-
|
77
10
|
backbone_cls = CSPDarkNetBackbone
|
78
|
-
|
79
|
-
def __init__(
|
80
|
-
self,
|
81
|
-
backbone,
|
82
|
-
num_classes,
|
83
|
-
activation="softmax",
|
84
|
-
preprocessor=None, # adding this dummy arg for saved model test
|
85
|
-
# TODO: once preprocessor flow is figured out, this needs to be updated
|
86
|
-
**kwargs,
|
87
|
-
):
|
88
|
-
# === Layers ===
|
89
|
-
self.backbone = backbone
|
90
|
-
self.output_dense = keras.layers.Dense(
|
91
|
-
num_classes,
|
92
|
-
activation=activation,
|
93
|
-
name="predictions",
|
94
|
-
)
|
95
|
-
|
96
|
-
# === Functional Model ===
|
97
|
-
inputs = self.backbone.input
|
98
|
-
x = self.backbone(inputs)
|
99
|
-
outputs = self.output_dense(x)
|
100
|
-
super().__init__(
|
101
|
-
inputs=inputs,
|
102
|
-
outputs=outputs,
|
103
|
-
**kwargs,
|
104
|
-
)
|
105
|
-
|
106
|
-
# === Config ===
|
107
|
-
self.num_classes = num_classes
|
108
|
-
self.activation = activation
|
109
|
-
|
110
|
-
def get_config(self):
|
111
|
-
# Backbone serialized in `super`
|
112
|
-
config = super().get_config()
|
113
|
-
config.update(
|
114
|
-
{
|
115
|
-
"num_classes": self.num_classes,
|
116
|
-
"activation": self.activation,
|
117
|
-
}
|
118
|
-
)
|
119
|
-
return config
|
@@ -8,11 +8,9 @@ backbone_presets = {
|
|
8
8
|
"Trained on English Wikipedia, BookCorpus and OpenWebText."
|
9
9
|
),
|
10
10
|
"params": 70682112,
|
11
|
-
"official_name": "DeBERTaV3",
|
12
11
|
"path": "deberta_v3",
|
13
|
-
"model_card": "https://huggingface.co/microsoft/deberta-v3-xsmall",
|
14
12
|
},
|
15
|
-
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_extra_small_en/
|
13
|
+
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_extra_small_en/3",
|
16
14
|
},
|
17
15
|
"deberta_v3_small_en": {
|
18
16
|
"metadata": {
|
@@ -21,11 +19,9 @@ backbone_presets = {
|
|
21
19
|
"Trained on English Wikipedia, BookCorpus and OpenWebText."
|
22
20
|
),
|
23
21
|
"params": 141304320,
|
24
|
-
"official_name": "DeBERTaV3",
|
25
22
|
"path": "deberta_v3",
|
26
|
-
"model_card": "https://huggingface.co/microsoft/deberta-v3-small",
|
27
23
|
},
|
28
|
-
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_small_en/
|
24
|
+
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_small_en/3",
|
29
25
|
},
|
30
26
|
"deberta_v3_base_en": {
|
31
27
|
"metadata": {
|
@@ -34,11 +30,9 @@ backbone_presets = {
|
|
34
30
|
"Trained on English Wikipedia, BookCorpus and OpenWebText."
|
35
31
|
),
|
36
32
|
"params": 183831552,
|
37
|
-
"official_name": "DeBERTaV3",
|
38
33
|
"path": "deberta_v3",
|
39
|
-
"model_card": "https://huggingface.co/microsoft/deberta-v3-base",
|
40
34
|
},
|
41
|
-
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_base_en/
|
35
|
+
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_base_en/3",
|
42
36
|
},
|
43
37
|
"deberta_v3_large_en": {
|
44
38
|
"metadata": {
|
@@ -47,11 +41,9 @@ backbone_presets = {
|
|
47
41
|
"Trained on English Wikipedia, BookCorpus and OpenWebText."
|
48
42
|
),
|
49
43
|
"params": 434012160,
|
50
|
-
"official_name": "DeBERTaV3",
|
51
44
|
"path": "deberta_v3",
|
52
|
-
"model_card": "https://huggingface.co/microsoft/deberta-v3-large",
|
53
45
|
},
|
54
|
-
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_large_en/
|
46
|
+
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_large_en/3",
|
55
47
|
},
|
56
48
|
"deberta_v3_base_multi": {
|
57
49
|
"metadata": {
|
@@ -60,10 +52,8 @@ backbone_presets = {
|
|
60
52
|
"Trained on the 2.5TB multilingual CC100 dataset."
|
61
53
|
),
|
62
54
|
"params": 278218752,
|
63
|
-
"official_name": "DeBERTaV3",
|
64
55
|
"path": "deberta_v3",
|
65
|
-
"model_card": "https://huggingface.co/microsoft/mdeberta-v3-base",
|
66
56
|
},
|
67
|
-
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_base_multi/
|
57
|
+
"kaggle_handle": "kaggle://keras/deberta_v3/keras/deberta_v3_base_multi/3",
|
68
58
|
},
|
69
59
|
}
|
@@ -7,7 +7,7 @@ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
|
|
7
7
|
from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
|
8
8
|
deberta_kernel_initializer,
|
9
9
|
)
|
10
|
-
from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import (
|
10
|
+
from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( # noqa: E501
|
11
11
|
DebertaV3TextClassifierPreprocessor,
|
12
12
|
)
|
13
13
|
from keras_hub.src.models.text_classifier import TextClassifier
|
@@ -43,9 +43,9 @@ class DebertaV3TextClassifier(TextClassifier):
|
|
43
43
|
Args:
|
44
44
|
backbone: A `keras_hub.models.DebertaV3` instance.
|
45
45
|
num_classes: int. Number of classes to predict.
|
46
|
-
preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor`
|
47
|
-
`None`, this model will not apply preprocessing, and
|
48
|
-
be preprocessed before calling the model.
|
46
|
+
preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor`
|
47
|
+
or `None`. If `None`, this model will not apply preprocessing, and
|
48
|
+
inputs should be preprocessed before calling the model.
|
49
49
|
activation: Optional `str` or callable. The
|
50
50
|
activation function to use on the model outputs. Set
|
51
51
|
`activation="softmax"` to return output probabilities.
|
@@ -45,7 +45,7 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
|
|
45
45
|
bias_initializer: string or `keras.initializers` initializer.
|
46
46
|
The bias initializer for the dense and disentangled
|
47
47
|
self-attention layers. Defaults to `"zeros"`.
|
48
|
-
"""
|
48
|
+
""" # noqa: E501
|
49
49
|
|
50
50
|
def __init__(
|
51
51
|
self,
|
@@ -58,7 +58,7 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
|
|
58
58
|
layer_norm_epsilon=1e-05,
|
59
59
|
kernel_initializer="glorot_uniform",
|
60
60
|
bias_initializer="zeros",
|
61
|
-
**kwargs
|
61
|
+
**kwargs,
|
62
62
|
):
|
63
63
|
super().__init__(**kwargs)
|
64
64
|
self.intermediate_dim = intermediate_dim
|
@@ -145,8 +145,8 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
|
|
145
145
|
"""Forward pass of `DisentangledAttentionEncoder`.
|
146
146
|
|
147
147
|
Args:
|
148
|
-
inputs: a Tensor. The input data to `DisentangledAttentionEncoder`,
|
149
|
-
of shape [batch_size, sequence_length, hidden_dim].
|
148
|
+
inputs: a Tensor. The input data to `DisentangledAttentionEncoder`,
|
149
|
+
should be of shape [batch_size, sequence_length, hidden_dim].
|
150
150
|
rel_embeddings: a Tensor. The relative position embedding matrix,
|
151
151
|
should be of shape `[batch_size, 2 * bucket_size, hidden_dim]`.
|
152
152
|
padding_mask: a boolean Tensor. It indicates if the token should be
|
@@ -31,7 +31,7 @@ class DisentangledSelfAttention(keras.layers.Layer):
|
|
31
31
|
bias_initializer: string or `keras.initializers` initializer.
|
32
32
|
The bias initializer for the dense layers.
|
33
33
|
Defaults to `"zeros"`.
|
34
|
-
"""
|
34
|
+
""" # noqa: E501
|
35
35
|
|
36
36
|
def __init__(
|
37
37
|
self,
|
@@ -363,7 +363,8 @@ class DisentangledSelfAttention(keras.layers.Layer):
|
|
363
363
|
training=training,
|
364
364
|
)
|
365
365
|
|
366
|
-
# Reshape `attention_output` to
|
366
|
+
# Reshape `attention_output` to
|
367
|
+
# `(batch_size, sequence_length, hidden_dim)`.
|
367
368
|
attention_output = ops.reshape(
|
368
369
|
attention_output,
|
369
370
|
[
|
@@ -0,0 +1,7 @@
|
|
1
|
+
from keras_hub.src.models.deeplab_v3.deeplab_v3_backbone import (
|
2
|
+
DeepLabV3Backbone,
|
3
|
+
)
|
4
|
+
from keras_hub.src.models.deeplab_v3.deeplab_v3_presets import backbone_presets
|
5
|
+
from keras_hub.src.utils.preset_utils import register_presets
|
6
|
+
|
7
|
+
register_presets(backbone_presets, DeepLabV3Backbone)
|