keras-hub-nightly 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +1 -0
- keras_hub/api/models/__init__.py +11 -6
- keras_hub/api/tokenizers/__init__.py +1 -1
- keras_hub/src/bounding_box/converters.py +2 -2
- keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
- keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
- keras_hub/src/layers/modeling/rms_normalization.py +8 -6
- keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
- keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
- keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
- keras_hub/src/layers/modeling/transformer_encoder.py +3 -1
- keras_hub/src/metrics/bleu.py +1 -1
- keras_hub/src/models/albert/albert_text_classifier.py +7 -7
- keras_hub/src/models/bart/bart_backbone.py +4 -4
- keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
- keras_hub/src/models/bert/bert_presets.py +4 -2
- keras_hub/src/models/bert/bert_text_classifier.py +3 -3
- keras_hub/src/models/causal_lm.py +19 -15
- keras_hub/src/models/clip/clip_vision_embedding.py +1 -1
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
- keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
- keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
- keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +17 -13
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +4 -3
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +1 -1
- keras_hub/src/models/densenet/densenet_backbone.py +3 -1
- keras_hub/src/models/densenet/densenet_image_classifier.py +1 -1
- keras_hub/src/models/densenet/densenet_presets.py +6 -6
- keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/distil_bert/distil_bert_presets.py +2 -1
- keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
- keras_hub/src/models/efficientnet/cba.py +1 -1
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +20 -8
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +1 -1
- keras_hub/src/models/efficientnet/efficientnet_presets.py +12 -11
- keras_hub/src/models/efficientnet/fusedmbconv.py +3 -5
- keras_hub/src/models/efficientnet/mbconv.py +1 -1
- keras_hub/src/models/electra/electra_backbone.py +2 -2
- keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
- keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
- keras_hub/src/models/falcon/falcon_backbone.py +5 -3
- keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
- keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
- keras_hub/src/models/flux/flux_layers.py +46 -44
- keras_hub/src/models/flux/flux_maths.py +24 -17
- keras_hub/src/models/flux/flux_model.py +24 -19
- keras_hub/src/models/flux/flux_presets.py +2 -1
- keras_hub/src/models/flux/flux_text_to_image.py +7 -3
- keras_hub/src/models/gemma/gemma_backbone.py +27 -20
- keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
- keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
- keras_hub/src/models/gemma/gemma_presets.py +9 -3
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
- keras_hub/src/models/image_classifier_preprocessor.py +4 -1
- keras_hub/src/models/image_object_detector.py +2 -2
- keras_hub/src/models/image_object_detector_preprocessor.py +4 -4
- keras_hub/src/models/image_segmenter_preprocessor.py +2 -2
- keras_hub/src/models/llama/llama_backbone.py +34 -26
- keras_hub/src/models/llama3/llama3_backbone.py +12 -11
- keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
- keras_hub/src/models/mistral/mistral_backbone.py +16 -15
- keras_hub/src/models/mistral/mistral_causal_lm.py +3 -3
- keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
- keras_hub/src/models/mit/mit_backbone.py +4 -3
- keras_hub/src/models/mit/mit_layers.py +2 -1
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +7 -7
- keras_hub/src/models/opt/opt_causal_lm.py +2 -2
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +5 -3
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +2 -2
- keras_hub/src/models/phi3/phi3_decoder.py +0 -1
- keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
- keras_hub/src/models/preprocessor.py +2 -2
- keras_hub/src/models/retinanet/feature_pyramid.py +3 -2
- keras_hub/src/models/retinanet/prediction_head.py +2 -2
- keras_hub/src/models/retinanet/retinanet_backbone.py +2 -2
- keras_hub/src/models/retinanet/retinanet_image_converter.py +1 -1
- keras_hub/src/models/retinanet/retinanet_object_detector.py +5 -6
- keras_hub/src/models/retinanet/retinanet_presets.py +2 -1
- keras_hub/src/models/roberta/roberta_backbone.py +2 -2
- keras_hub/src/models/roberta/roberta_presets.py +4 -2
- keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
- keras_hub/src/models/sam/sam_backbone.py +2 -2
- keras_hub/src/models/sam/sam_image_segmenter.py +6 -5
- keras_hub/src/models/sam/sam_layers.py +5 -3
- keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
- keras_hub/src/models/sam/sam_transformer.py +5 -4
- keras_hub/src/models/segformer/segformer_backbone.py +18 -14
- keras_hub/src/models/segformer/segformer_image_segmenter.py +51 -38
- keras_hub/src/models/segformer/segformer_presets.py +24 -12
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
- keras_hub/src/models/stable_diffusion_3/mmdit.py +20 -1
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +1 -1
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +13 -6
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +2 -2
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +7 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
- keras_hub/src/models/task.py +4 -2
- keras_hub/src/models/text_classifier.py +2 -2
- keras_hub/src/models/text_to_image.py +5 -1
- keras_hub/src/models/vae/vae_layers.py +0 -1
- keras_hub/src/models/vit/__init__.py +5 -0
- keras_hub/src/models/vit/vit_backbone.py +152 -0
- keras_hub/src/models/vit/vit_image_classifier.py +187 -0
- keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vit/vit_image_converter.py +73 -0
- keras_hub/src/models/vit/vit_layers.py +391 -0
- keras_hub/src/models/vit/vit_presets.py +49 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +4 -2
- keras_hub/src/models/vit_det/vit_layers.py +3 -3
- keras_hub/src/models/whisper/whisper_audio_converter.py +1 -3
- keras_hub/src/models/whisper/whisper_backbone.py +6 -5
- keras_hub/src/models/whisper/whisper_decoder.py +3 -5
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
- keras_hub/src/models/xlnet/relative_attention.py +20 -19
- keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
- keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
- keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
- keras_hub/src/samplers/contrastive_sampler.py +2 -3
- keras_hub/src/samplers/sampler.py +2 -1
- keras_hub/src/tests/test_case.py +2 -2
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +2 -2
- keras_hub/src/tokenizers/byte_tokenizer.py +2 -8
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
- keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +7 -12
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +8 -5
- keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +7 -3
- keras_hub/src/utils/preset_utils.py +25 -18
- keras_hub/src/utils/tensor_utils.py +4 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +2 -4
- keras_hub/src/utils/transformers/convert_vit.py +150 -0
- keras_hub/src/utils/transformers/preset_loader.py +23 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/RECORD +148 -140
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/top_level.txt +0 -0
keras_hub/api/layers/__init__.py
CHANGED
@@ -63,6 +63,7 @@ from keras_hub.src.models.segformer.segformer_image_converter import (
|
|
63
63
|
SegFormerImageConverter,
|
64
64
|
)
|
65
65
|
from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter
|
66
|
+
from keras_hub.src.models.vit.vit_image_converter import ViTImageConverter
|
66
67
|
from keras_hub.src.models.whisper.whisper_audio_converter import (
|
67
68
|
WhisperAudioConverter,
|
68
69
|
)
|
keras_hub/api/models/__init__.py
CHANGED
@@ -200,18 +200,18 @@ from keras_hub.src.models.image_segmenter_preprocessor import (
|
|
200
200
|
)
|
201
201
|
from keras_hub.src.models.image_to_image import ImageToImage
|
202
202
|
from keras_hub.src.models.inpaint import Inpaint
|
203
|
-
from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
|
204
|
-
from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM
|
205
|
-
from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
|
206
|
-
Llama3CausalLMPreprocessor,
|
207
|
-
)
|
208
|
-
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
|
209
203
|
from keras_hub.src.models.llama.llama_backbone import LlamaBackbone
|
210
204
|
from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM
|
211
205
|
from keras_hub.src.models.llama.llama_causal_lm_preprocessor import (
|
212
206
|
LlamaCausalLMPreprocessor,
|
213
207
|
)
|
214
208
|
from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
|
209
|
+
from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
|
210
|
+
from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM
|
211
|
+
from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
|
212
|
+
Llama3CausalLMPreprocessor,
|
213
|
+
)
|
214
|
+
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
|
215
215
|
from keras_hub.src.models.masked_lm import MaskedLM
|
216
216
|
from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
|
217
217
|
from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
|
@@ -330,6 +330,11 @@ from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageClassifier
|
|
330
330
|
from keras_hub.src.models.vgg.vgg_image_classifier_preprocessor import (
|
331
331
|
VGGImageClassifierPreprocessor,
|
332
332
|
)
|
333
|
+
from keras_hub.src.models.vit.vit_backbone import ViTBackbone
|
334
|
+
from keras_hub.src.models.vit.vit_image_classifier import ViTImageClassifier
|
335
|
+
from keras_hub.src.models.vit.vit_image_classifier_preprocessor import (
|
336
|
+
ViTImageClassifierPreprocessor,
|
337
|
+
)
|
333
338
|
from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone
|
334
339
|
from keras_hub.src.models.whisper.whisper_backbone import WhisperBackbone
|
335
340
|
from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer
|
@@ -21,8 +21,8 @@ from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer
|
|
21
21
|
from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer
|
22
22
|
from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer
|
23
23
|
from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
|
24
|
-
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
|
25
24
|
from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
|
25
|
+
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
|
26
26
|
from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
|
27
27
|
from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer
|
28
28
|
from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
|
@@ -73,8 +73,8 @@ def encode_box_to_deltas(
|
|
73
73
|
|
74
74
|
if encoding_format not in ["center_xywh", "center_yxhw"]:
|
75
75
|
raise ValueError(
|
76
|
-
"`encoding_format` should be one of 'center_xywh' or
|
77
|
-
f"got {encoding_format}"
|
76
|
+
"`encoding_format` should be one of 'center_xywh' or "
|
77
|
+
f"'center_yxhw', got {encoding_format}"
|
78
78
|
)
|
79
79
|
|
80
80
|
encoded_anchors = convert_format(
|
@@ -34,7 +34,8 @@ class MaskedLMHead(keras.layers.Layer):
|
|
34
34
|
token_embedding: Optional. A `keras_hub.layers.ReversibleEmbedding`
|
35
35
|
instance. If passed, the layer will be used to project from the
|
36
36
|
`hidden_dim` of the model to the output `vocabulary_size`.
|
37
|
-
intermediate_activation: The activation function of intermediate dense
|
37
|
+
intermediate_activation: The activation function of intermediate dense
|
38
|
+
layer.
|
38
39
|
activation: The activation function for the outputs of the layer.
|
39
40
|
Usually either `None` (return logits), or `"softmax"`
|
40
41
|
(return probabilities).
|
@@ -6,10 +6,11 @@ from keras_hub.src.api_export import keras_hub_export
|
|
6
6
|
|
7
7
|
@keras_hub_export("keras_hub.layers.RMSNormalization")
|
8
8
|
class RMSNormalization(keras.layers.Layer):
|
9
|
-
"""
|
10
|
-
|
9
|
+
"""Root Mean Square (RMS) Normalization layer.
|
10
|
+
|
11
11
|
This layer normalizes the input tensor based on its RMS value and applies
|
12
12
|
a learned scaling factor.
|
13
|
+
|
13
14
|
Args:
|
14
15
|
input_dim: int. The dimensionality of the input tensor.
|
15
16
|
"""
|
@@ -21,12 +22,13 @@ class RMSNormalization(keras.layers.Layer):
|
|
21
22
|
)
|
22
23
|
|
23
24
|
def call(self, x):
|
24
|
-
"""
|
25
|
-
|
25
|
+
"""Applies RMS normalization to the input tensor.
|
26
|
+
|
26
27
|
Args:
|
27
|
-
x:
|
28
|
+
x: Input tensor of shape (batch_size, input_dim).
|
29
|
+
|
28
30
|
Returns:
|
29
|
-
|
31
|
+
The RMS-normalized tensor of the same shape (batch_size, input_dim),
|
30
32
|
scaled by the learned `scale` parameter.
|
31
33
|
"""
|
32
34
|
x = ops.cast(x, float)
|
@@ -11,7 +11,8 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
11
11
|
This layer encodes absolute positional information with a rotation
|
12
12
|
matrix. It calculates the rotary encoding with a mix of sine and
|
13
13
|
cosine functions with geometrically increasing wavelengths.
|
14
|
-
Defined and formulated in
|
14
|
+
Defined and formulated in
|
15
|
+
[RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864v4).
|
15
16
|
The input must be a tensor with shape a sequence dimension and a feature
|
16
17
|
dimension. Typically, this will either an input with shape
|
17
18
|
`(batch_size, sequence_length, feature_length)` or
|
@@ -65,7 +66,7 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
65
66
|
scaling_factor=1.0,
|
66
67
|
sequence_axis=1,
|
67
68
|
feature_axis=-1,
|
68
|
-
**kwargs
|
69
|
+
**kwargs,
|
69
70
|
):
|
70
71
|
super().__init__(**kwargs)
|
71
72
|
self.max_wavelength = max_wavelength
|
@@ -5,12 +5,13 @@ from keras_hub.src.api_export import keras_hub_export
|
|
5
5
|
from keras_hub.src.layers.modeling.cached_multi_head_attention import (
|
6
6
|
CachedMultiHeadAttention,
|
7
7
|
)
|
8
|
-
from keras_hub.src.
|
9
|
-
|
10
|
-
from keras_hub.src.layers.modeling.transformer_layer_utils import ( # isort:skip
|
8
|
+
from keras_hub.src.layers.modeling.transformer_layer_utils import (
|
11
9
|
compute_causal_mask,
|
10
|
+
)
|
11
|
+
from keras_hub.src.layers.modeling.transformer_layer_utils import (
|
12
12
|
merge_padding_and_attention_mask,
|
13
13
|
)
|
14
|
+
from keras_hub.src.utils.keras_utils import clone_initializer
|
14
15
|
|
15
16
|
|
16
17
|
@keras_hub_export("keras_hub.layers.TransformerDecoder")
|
@@ -265,13 +266,13 @@ class TransformerDecoder(keras.layers.Layer):
|
|
265
266
|
`[batch_size, decoder_sequence_length]`.
|
266
267
|
decoder_attention_mask: a boolean Tensor. Customized decoder
|
267
268
|
sequence mask, must be of shape
|
268
|
-
`[batch_size, decoder_sequence_length, decoder_sequence_length]
|
269
|
+
`[batch_size, decoder_sequence_length, decoder_sequence_length]`
|
269
270
|
encoder_padding_mask: a boolean Tensor, the padding mask of encoder
|
270
271
|
sequence, must be of shape
|
271
272
|
`[batch_size, encoder_sequence_length]`.
|
272
273
|
encoder_attention_mask: a boolean Tensor. Customized encoder
|
273
274
|
sequence mask, must be of shape
|
274
|
-
`[batch_size, encoder_sequence_length, encoder_sequence_length]
|
275
|
+
`[batch_size, encoder_sequence_length, encoder_sequence_length]`
|
275
276
|
self_attention_cache: a dense float Tensor. The cache of key/values
|
276
277
|
pairs in the self-attention layer. Has shape
|
277
278
|
`[batch_size, 2, max_seq_len, num_heads, key_dims]`.
|
@@ -435,7 +436,8 @@ class TransformerDecoder(keras.layers.Layer):
|
|
435
436
|
input_length = output_length = ops.shape(decoder_sequence)[1]
|
436
437
|
# We need to handle a rectangular causal mask when doing cached
|
437
438
|
# decoding. For generative inference, `decoder_sequence` will
|
438
|
-
# generally be length 1, and `cache` will be the full generation
|
439
|
+
# generally be length 1, and `cache` will be the full generation
|
440
|
+
# length.
|
439
441
|
if self_attention_cache is not None:
|
440
442
|
input_length = ops.shape(self_attention_cache)[2]
|
441
443
|
|
@@ -190,7 +190,9 @@ class TransformerEncoder(keras.layers.Layer):
|
|
190
190
|
[batch_size, sequence_length, sequence_length].
|
191
191
|
training: a boolean indicating whether the layer should behave in
|
192
192
|
training mode or in inference mode.
|
193
|
-
return_attention_scores: a boolean indicating whether the output
|
193
|
+
return_attention_scores: a boolean indicating whether the output
|
194
|
+
should be `(attention_output, attention_scores)` if `True` or
|
195
|
+
`attention_output` if `False`. Defaults to `False`.
|
194
196
|
|
195
197
|
Returns:
|
196
198
|
A Tensor of the same shape as the `inputs`.
|
keras_hub/src/metrics/bleu.py
CHANGED
@@ -164,7 +164,7 @@ class Bleu(keras.metrics.Metric):
|
|
164
164
|
return inputs
|
165
165
|
|
166
166
|
def _get_ngrams(self, segment, max_order):
|
167
|
-
"""Extracts all n-grams up to a given maximum order from an input
|
167
|
+
"""Extracts all n-grams up to a given maximum order from an input.
|
168
168
|
|
169
169
|
Uses Python ops. Inspired from
|
170
170
|
https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py.
|
@@ -20,10 +20,10 @@ from keras_hub.src.models.text_classifier import TextClassifier
|
|
20
20
|
class AlbertTextClassifier(TextClassifier):
|
21
21
|
"""An end-to-end ALBERT model for classification tasks
|
22
22
|
|
23
|
-
This model attaches a classification head to a
|
24
|
-
backbone, mapping from the backbone outputs
|
25
|
-
a classification task. For usage of this model
|
26
|
-
the `from_preset()` method.
|
23
|
+
This model attaches a classification head to a
|
24
|
+
`keras_hub.model.AlbertBackbone` backbone, mapping from the backbone outputs
|
25
|
+
to logit output suitable for a classification task. For usage of this model
|
26
|
+
with pre-trained weights, see the `from_preset()` method.
|
27
27
|
|
28
28
|
This model can optionally be configured with a `preprocessor` layer, in
|
29
29
|
which case it will automatically apply preprocessing to raw inputs during
|
@@ -36,9 +36,9 @@ class AlbertTextClassifier(TextClassifier):
|
|
36
36
|
Args:
|
37
37
|
backbone: A `keras_hub.models.AlertBackbone` instance.
|
38
38
|
num_classes: int. Number of classes to predict.
|
39
|
-
preprocessor: A `keras_hub.models.AlbertTextClassifierPreprocessor` or
|
40
|
-
`None`, this model will not apply preprocessing, and
|
41
|
-
be preprocessed before calling the model.
|
39
|
+
preprocessor: A `keras_hub.models.AlbertTextClassifierPreprocessor` or
|
40
|
+
`None`. If `None`, this model will not apply preprocessing, and
|
41
|
+
inputs should be preprocessed before calling the model.
|
42
42
|
activation: Optional `str` or callable. The
|
43
43
|
activation function to use on the model outputs. Set
|
44
44
|
`activation="softmax"` to return output probabilities.
|
@@ -22,9 +22,9 @@ class BartBackbone(Backbone):
|
|
22
22
|
described in
|
23
23
|
["BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension"](https://arxiv.org/abs/1910.13461).
|
24
24
|
|
25
|
-
The default constructor gives a fully customizable, randomly initialized
|
26
|
-
model with any number of layers, heads, and embedding dimensions. To
|
27
|
-
preset architectures and weights, use the `from_preset` constructor.
|
25
|
+
The default constructor gives a fully customizable, randomly initialized
|
26
|
+
BART model with any number of layers, heads, and embedding dimensions. To
|
27
|
+
load preset architectures and weights, use the `from_preset` constructor.
|
28
28
|
|
29
29
|
Disclaimer: Pre-trained models are provided on an "as is" basis, without
|
30
30
|
warranties or conditions of any kind. The underlying model is provided by a
|
@@ -78,7 +78,7 @@ class BartBackbone(Backbone):
|
|
78
78
|
)
|
79
79
|
output = model(input_data)
|
80
80
|
```
|
81
|
-
"""
|
81
|
+
""" # noqa: E501
|
82
82
|
|
83
83
|
def __init__(
|
84
84
|
self,
|
@@ -60,7 +60,8 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
60
60
|
bart_lm.generate("The quick brown fox", max_length=30)
|
61
61
|
```
|
62
62
|
|
63
|
-
Use `generate()` with encoder inputs and an incomplete decoder input
|
63
|
+
Use `generate()` with encoder inputs and an incomplete decoder input
|
64
|
+
(prompt).
|
64
65
|
```python
|
65
66
|
bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset("bart_base_en")
|
66
67
|
bart_lm.generate(
|
@@ -79,10 +80,10 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
79
80
|
prompt = {
|
80
81
|
"encoder_token_ids": np.array([[0, 133, 2119, 6219, 23602, 2, 1, 1]]),
|
81
82
|
"encoder_padding_mask": np.array(
|
82
|
-
[[
|
83
|
+
[[1, 1, 1, 1, 1, 1, 0, 0]]
|
83
84
|
),
|
84
85
|
"decoder_token_ids": np.array([[2, 0, 133, 1769, 2, 1, 1]]),
|
85
|
-
"decoder_padding_mask": np.array([[
|
86
|
+
"decoder_padding_mask": np.array([[1, 1, 1, 1, 0, 0]])
|
86
87
|
}
|
87
88
|
|
88
89
|
bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset(
|
@@ -95,7 +96,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
95
96
|
Call `fit()` on a single batch.
|
96
97
|
```python
|
97
98
|
features = {
|
98
|
-
"encoder_text": ["The quick
|
99
|
+
"encoder_text": ["The quick fox jumped.", "I forgot my homework."],
|
99
100
|
"decoder_text": ["The fast hazel fox leapt.", "I forgot my assignment."]
|
100
101
|
}
|
101
102
|
bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset("bart_base_en")
|
@@ -195,7 +196,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
195
196
|
cross_attention_cache=None,
|
196
197
|
cross_attention_cache_update_index=None,
|
197
198
|
):
|
198
|
-
"""Forward pass with a key/value caches for generative decoding
|
199
|
+
"""Forward pass with a key/value caches for generative decoding.
|
199
200
|
|
200
201
|
`call_decoder_with_cache` adds an additional inference-time forward pass
|
201
202
|
for the model for seq2seq text generation. Unlike calling the model
|
@@ -241,7 +242,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
241
242
|
key/value cache in the decoder's self-attention layer and
|
242
243
|
`cross_attention_cache` is the key/value cache in the decoder's
|
243
244
|
cross-attention layer.
|
244
|
-
"""
|
245
|
+
""" # noqa: E501
|
245
246
|
# Embedding layers.
|
246
247
|
tokens = self.backbone.token_embedding(decoder_token_ids)
|
247
248
|
positions = self.backbone.decoder_position_embedding(
|
@@ -331,7 +332,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
331
332
|
def _build_cache(
|
332
333
|
self, encoder_token_ids, encoder_padding_mask, decoder_token_ids
|
333
334
|
):
|
334
|
-
"""Builds the self-attention cache and the cross-attention cache
|
335
|
+
"""Builds the self-attention cache and the cross-attention cache."""
|
335
336
|
encoder_hidden_states = self.call_encoder(
|
336
337
|
token_ids=encoder_token_ids, padding_mask=encoder_padding_mask
|
337
338
|
)
|
@@ -417,7 +418,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
|
|
417
418
|
prompt = ops.slice(prompt, [0, cache_index], [num_samples, 1])
|
418
419
|
|
419
420
|
def repeat_tensor(x):
|
420
|
-
"""Repeats
|
421
|
+
"""Repeats along batch axis to match dim for beam search."""
|
421
422
|
if ops.shape(x)[0] == num_samples:
|
422
423
|
return x
|
423
424
|
return ops.repeat(x, repeats=num_samples // batch_size, axis=0)
|
@@ -69,7 +69,8 @@ backbone_presets = {
|
|
69
69
|
"bert_base_multi": {
|
70
70
|
"metadata": {
|
71
71
|
"description": (
|
72
|
-
"12-layer BERT model where case is maintained. Trained on
|
72
|
+
"12-layer BERT model where case is maintained. Trained on "
|
73
|
+
"trained on Wikipedias of 104 languages"
|
73
74
|
),
|
74
75
|
"params": 177853440,
|
75
76
|
"path": "bert",
|
@@ -101,7 +102,8 @@ backbone_presets = {
|
|
101
102
|
"bert_tiny_en_uncased_sst2": {
|
102
103
|
"metadata": {
|
103
104
|
"description": (
|
104
|
-
"The bert_tiny_en_uncased backbone model fine-tuned on the
|
105
|
+
"The bert_tiny_en_uncased backbone model fine-tuned on the "
|
106
|
+
"SST-2 sentiment analysis dataset."
|
105
107
|
),
|
106
108
|
"params": 4385920,
|
107
109
|
"path": "bert",
|
@@ -34,9 +34,9 @@ class BertTextClassifier(TextClassifier):
|
|
34
34
|
Args:
|
35
35
|
backbone: A `keras_hub.models.BertBackbone` instance.
|
36
36
|
num_classes: int. Number of classes to predict.
|
37
|
-
preprocessor: A `keras_hub.models.BertTextClassifierPreprocessor` or
|
38
|
-
`None`, this model will not apply preprocessing, and
|
39
|
-
be preprocessed before calling the model.
|
37
|
+
preprocessor: A `keras_hub.models.BertTextClassifierPreprocessor` or
|
38
|
+
`None`. If `None`, this model will not apply preprocessing, and
|
39
|
+
inputs should be preprocessed before calling the model.
|
40
40
|
activation: Optional `str` or callable. The
|
41
41
|
activation function to use on the model outputs. Set
|
42
42
|
`activation="softmax"` to return output probabilities.
|
@@ -303,16 +303,18 @@ class CausalLM(Task):
|
|
303
303
|
`preprocessor`. If `preprocessor` is `None`, `inputs` should be
|
304
304
|
should be padded to the desired maximum length and this argument
|
305
305
|
will be ignored.
|
306
|
-
stop_token_ids: Optional. `None`, "auto", or tuple of token ids.
|
307
|
-
to "auto" which uses the
|
308
|
-
Not specifying a
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
306
|
+
stop_token_ids: Optional. `None`, "auto", or tuple of token ids.
|
307
|
+
Defaults to "auto" which uses the
|
308
|
+
`preprocessor.tokenizer.end_token_id`. Not specifying a
|
309
|
+
processor will produce an error. None stops generation after
|
310
|
+
generating `max_length` tokens. You may also specify a list of
|
311
|
+
token id's the model should stop on. Note that sequences of
|
312
|
+
tokens will each be interpreted as a stop token, multi-token
|
313
|
+
stop sequences are not supported.
|
314
|
+
strip_prompt: Optional. By default, generate() returns the full
|
315
|
+
prompt followed by its completion generated by the model. If
|
316
|
+
this option is set to True, only the newly generated text is
|
317
|
+
returned.
|
316
318
|
"""
|
317
319
|
# Setup our three main passes.
|
318
320
|
# 1. Optionally preprocessing strings to dense integer tensors.
|
@@ -322,10 +324,11 @@ class CausalLM(Task):
|
|
322
324
|
|
323
325
|
if self.preprocessor is None and stop_token_ids == "auto":
|
324
326
|
raise ValueError(
|
325
|
-
|
326
|
-
"Currently `preprocessor=None`. To
|
327
|
-
"
|
328
|
-
"`
|
327
|
+
"A `preprocessor` must be attached to the model if "
|
328
|
+
'`stop_token_ids="auto"`. Currently `preprocessor=None`. To '
|
329
|
+
"call `generate()` with preprocessing detached, either pass "
|
330
|
+
"`stop_token_ids=None` to always generate until `max_length` "
|
331
|
+
"or pass a tuple of token ids that should terminate generation "
|
329
332
|
"as `stop_token_ids`."
|
330
333
|
)
|
331
334
|
elif stop_token_ids == "auto":
|
@@ -365,7 +368,8 @@ class CausalLM(Task):
|
|
365
368
|
y["padding_mask"] = ops.logical_xor(
|
366
369
|
roll_sequence(prompt_mask), roll_sequence(x["padding_mask"])
|
367
370
|
)
|
368
|
-
# we assume the mask is enough and there is no need to zero-out the
|
371
|
+
# we assume the mask is enough and there is no need to zero-out the
|
372
|
+
# values
|
369
373
|
y["token_ids"] = roll_sequence(x["token_ids"])
|
370
374
|
|
371
375
|
return y
|
@@ -360,7 +360,8 @@ def apply_cross_stage_partial(
|
|
360
360
|
"""
|
361
361
|
|
362
362
|
if name is None:
|
363
|
-
|
363
|
+
uid = keras.backend.get_uid("cross_stage_partial")
|
364
|
+
name = f"cross_stage_partial_{uid}"
|
364
365
|
|
365
366
|
def apply(inputs):
|
366
367
|
hidden_channels = filters // 2
|
@@ -7,7 +7,7 @@ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
|
|
7
7
|
from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
|
8
8
|
deberta_kernel_initializer,
|
9
9
|
)
|
10
|
-
from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import (
|
10
|
+
from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( # noqa: E501
|
11
11
|
DebertaV3TextClassifierPreprocessor,
|
12
12
|
)
|
13
13
|
from keras_hub.src.models.text_classifier import TextClassifier
|
@@ -43,9 +43,9 @@ class DebertaV3TextClassifier(TextClassifier):
|
|
43
43
|
Args:
|
44
44
|
backbone: A `keras_hub.models.DebertaV3` instance.
|
45
45
|
num_classes: int. Number of classes to predict.
|
46
|
-
preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor`
|
47
|
-
`None`, this model will not apply preprocessing, and
|
48
|
-
be preprocessed before calling the model.
|
46
|
+
preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor`
|
47
|
+
or `None`. If `None`, this model will not apply preprocessing, and
|
48
|
+
inputs should be preprocessed before calling the model.
|
49
49
|
activation: Optional `str` or callable. The
|
50
50
|
activation function to use on the model outputs. Set
|
51
51
|
`activation="softmax"` to return output probabilities.
|
@@ -45,7 +45,7 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
|
|
45
45
|
bias_initializer: string or `keras.initializers` initializer.
|
46
46
|
The bias initializer for the dense and disentangled
|
47
47
|
self-attention layers. Defaults to `"zeros"`.
|
48
|
-
"""
|
48
|
+
""" # noqa: E501
|
49
49
|
|
50
50
|
def __init__(
|
51
51
|
self,
|
@@ -58,7 +58,7 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
|
|
58
58
|
layer_norm_epsilon=1e-05,
|
59
59
|
kernel_initializer="glorot_uniform",
|
60
60
|
bias_initializer="zeros",
|
61
|
-
**kwargs
|
61
|
+
**kwargs,
|
62
62
|
):
|
63
63
|
super().__init__(**kwargs)
|
64
64
|
self.intermediate_dim = intermediate_dim
|
@@ -145,8 +145,8 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
|
|
145
145
|
"""Forward pass of `DisentangledAttentionEncoder`.
|
146
146
|
|
147
147
|
Args:
|
148
|
-
inputs: a Tensor. The input data to `DisentangledAttentionEncoder`,
|
149
|
-
of shape [batch_size, sequence_length, hidden_dim].
|
148
|
+
inputs: a Tensor. The input data to `DisentangledAttentionEncoder`,
|
149
|
+
should be of shape [batch_size, sequence_length, hidden_dim].
|
150
150
|
rel_embeddings: a Tensor. The relative position embedding matrix,
|
151
151
|
should be of shape `[batch_size, 2 * bucket_size, hidden_dim]`.
|
152
152
|
padding_mask: a boolean Tensor. It indicates if the token should be
|
@@ -31,7 +31,7 @@ class DisentangledSelfAttention(keras.layers.Layer):
|
|
31
31
|
bias_initializer: string or `keras.initializers` initializer.
|
32
32
|
The bias initializer for the dense layers.
|
33
33
|
Defaults to `"zeros"`.
|
34
|
-
"""
|
34
|
+
""" # noqa: E501
|
35
35
|
|
36
36
|
def __init__(
|
37
37
|
self,
|
@@ -363,7 +363,8 @@ class DisentangledSelfAttention(keras.layers.Layer):
|
|
363
363
|
training=training,
|
364
364
|
)
|
365
365
|
|
366
|
-
# Reshape `attention_output` to
|
366
|
+
# Reshape `attention_output` to
|
367
|
+
# `(batch_size, sequence_length, hidden_dim)`.
|
367
368
|
attention_output = ops.reshape(
|
368
369
|
attention_output,
|
369
370
|
[
|
@@ -12,10 +12,10 @@ class DeepLabV3Backbone(Backbone):
|
|
12
12
|
"""DeepLabV3 & DeepLabV3Plus architecture for semantic segmentation.
|
13
13
|
|
14
14
|
This class implements a DeepLabV3 & DeepLabV3Plus architecture as described
|
15
|
-
in [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](
|
16
|
-
|
17
|
-
and [Rethinking Atrous Convolution for Semantic Image Segmentation](
|
18
|
-
|
15
|
+
in [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611)
|
16
|
+
(ECCV 2018)
|
17
|
+
and [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
|
18
|
+
(CVPR 2017)
|
19
19
|
|
20
20
|
Args:
|
21
21
|
image_encoder: `keras.Model`. An instance that is used as a feature
|
@@ -35,20 +35,23 @@ class DeepLabV3Backbone(Backbone):
|
|
35
35
|
layer resolution should match with the `low_level_feature`s layer
|
36
36
|
resolution to concatenate both the layers for combined encoder
|
37
37
|
outputs.
|
38
|
-
dilation_rates: list. A `list` of integers for parallel dilated conv
|
39
|
-
|
38
|
+
dilation_rates: list. A `list` of integers for parallel dilated conv
|
39
|
+
applied to `SpatialPyramidPooling`. Usually a
|
40
40
|
sample choice of rates are `[6, 12, 18]`.
|
41
|
-
low_level_feature_key: str optional. A layer level to extract the
|
42
|
-
from one of the key from the `image_encoder`s
|
43
|
-
property such as "P2", "P3" etc which will be the
|
44
|
-
Required only when the DeepLabV3Plus architecture
|
41
|
+
low_level_feature_key: str optional. A layer level to extract the
|
42
|
+
feature from one of the key from the `image_encoder`s
|
43
|
+
`pyramid_outputs` property such as "P2", "P3" etc which will be the
|
44
|
+
Decoder block. Required only when the DeepLabV3Plus architecture
|
45
|
+
needs to be applied.
|
45
46
|
image_shape: tuple. The input shape without the batch size.
|
46
47
|
Defaults to `(None, None, 3)`.
|
47
48
|
|
48
49
|
Example:
|
49
50
|
```python
|
50
51
|
# Load a trained backbone to extract features from it's `pyramid_outputs`.
|
51
|
-
image_encoder = keras_hub.models.ResNetBackbone.from_preset(
|
52
|
+
image_encoder = keras_hub.models.ResNetBackbone.from_preset(
|
53
|
+
"resnet_50_imagenet"
|
54
|
+
)
|
52
55
|
|
53
56
|
model = keras_hub.models.DeepLabV3Backbone(
|
54
57
|
image_encoder=image_encoder,
|
@@ -59,7 +62,7 @@ class DeepLabV3Backbone(Backbone):
|
|
59
62
|
dilation_rates = [6, 12, 18]
|
60
63
|
)
|
61
64
|
```
|
62
|
-
"""
|
65
|
+
""" # noqa: E501
|
63
66
|
|
64
67
|
def __init__(
|
65
68
|
self,
|
@@ -74,7 +77,8 @@ class DeepLabV3Backbone(Backbone):
|
|
74
77
|
):
|
75
78
|
if not isinstance(image_encoder, keras.Model):
|
76
79
|
raise ValueError(
|
77
|
-
"Argument `image_encoder` must be a `keras.Model` instance.
|
80
|
+
"Argument `image_encoder` must be a `keras.Model` instance. "
|
81
|
+
"Received instead "
|
78
82
|
f"{image_encoder} (of type {type(image_encoder)})."
|
79
83
|
)
|
80
84
|
data_format = keras.config.image_data_format()
|
@@ -4,9 +4,10 @@ backbone_presets = {
|
|
4
4
|
"deeplab_v3_plus_resnet50_pascalvoc": {
|
5
5
|
"metadata": {
|
6
6
|
"description": (
|
7
|
-
"DeepLabV3+ model with ResNet50 as image encoder and trained
|
8
|
-
"augmented Pascal VOC dataset by Semantic Boundaries
|
9
|
-
"which is having categorical accuracy of 90.01
|
7
|
+
"DeepLabV3+ model with ResNet50 as image encoder and trained "
|
8
|
+
"on augmented Pascal VOC dataset by Semantic Boundaries "
|
9
|
+
"Dataset(SBD) which is having categorical accuracy of 90.01 "
|
10
|
+
"and 0.63 Mean IoU."
|
10
11
|
),
|
11
12
|
"params": 39190656,
|
12
13
|
"path": "deeplab_v3",
|
@@ -4,7 +4,7 @@ from keras_hub.src.api_export import keras_hub_export
|
|
4
4
|
from keras_hub.src.models.deeplab_v3.deeplab_v3_backbone import (
|
5
5
|
DeepLabV3Backbone,
|
6
6
|
)
|
7
|
-
from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor import (
|
7
|
+
from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor import ( # noqa: E501
|
8
8
|
DeepLabV3ImageSegmenterPreprocessor,
|
9
9
|
)
|
10
10
|
from keras_hub.src.models.image_segmenter import ImageSegmenter
|