keras-hub-nightly 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +1 -0
- keras_hub/api/models/__init__.py +11 -6
- keras_hub/api/tokenizers/__init__.py +1 -1
- keras_hub/src/bounding_box/converters.py +2 -2
- keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
- keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
- keras_hub/src/layers/modeling/rms_normalization.py +8 -6
- keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
- keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
- keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
- keras_hub/src/layers/modeling/transformer_encoder.py +3 -1
- keras_hub/src/metrics/bleu.py +1 -1
- keras_hub/src/models/albert/albert_text_classifier.py +7 -7
- keras_hub/src/models/bart/bart_backbone.py +4 -4
- keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
- keras_hub/src/models/bert/bert_presets.py +4 -2
- keras_hub/src/models/bert/bert_text_classifier.py +3 -3
- keras_hub/src/models/causal_lm.py +19 -15
- keras_hub/src/models/clip/clip_vision_embedding.py +1 -1
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
- keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
- keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
- keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +17 -13
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +4 -3
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +1 -1
- keras_hub/src/models/densenet/densenet_backbone.py +3 -1
- keras_hub/src/models/densenet/densenet_image_classifier.py +1 -1
- keras_hub/src/models/densenet/densenet_presets.py +6 -6
- keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/distil_bert/distil_bert_presets.py +2 -1
- keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
- keras_hub/src/models/efficientnet/cba.py +1 -1
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +20 -8
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +1 -1
- keras_hub/src/models/efficientnet/efficientnet_presets.py +12 -11
- keras_hub/src/models/efficientnet/fusedmbconv.py +3 -5
- keras_hub/src/models/efficientnet/mbconv.py +1 -1
- keras_hub/src/models/electra/electra_backbone.py +2 -2
- keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
- keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
- keras_hub/src/models/falcon/falcon_backbone.py +5 -3
- keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
- keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
- keras_hub/src/models/flux/flux_layers.py +46 -44
- keras_hub/src/models/flux/flux_maths.py +24 -17
- keras_hub/src/models/flux/flux_model.py +24 -19
- keras_hub/src/models/flux/flux_presets.py +2 -1
- keras_hub/src/models/flux/flux_text_to_image.py +7 -3
- keras_hub/src/models/gemma/gemma_backbone.py +27 -20
- keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
- keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
- keras_hub/src/models/gemma/gemma_presets.py +9 -3
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
- keras_hub/src/models/image_classifier_preprocessor.py +4 -1
- keras_hub/src/models/image_object_detector.py +2 -2
- keras_hub/src/models/image_object_detector_preprocessor.py +4 -4
- keras_hub/src/models/image_segmenter_preprocessor.py +2 -2
- keras_hub/src/models/llama/llama_backbone.py +34 -26
- keras_hub/src/models/llama3/llama3_backbone.py +12 -11
- keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
- keras_hub/src/models/mistral/mistral_backbone.py +16 -15
- keras_hub/src/models/mistral/mistral_causal_lm.py +3 -3
- keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
- keras_hub/src/models/mit/mit_backbone.py +4 -3
- keras_hub/src/models/mit/mit_layers.py +2 -1
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +7 -7
- keras_hub/src/models/opt/opt_causal_lm.py +2 -2
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +5 -3
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +2 -2
- keras_hub/src/models/phi3/phi3_decoder.py +0 -1
- keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
- keras_hub/src/models/preprocessor.py +2 -2
- keras_hub/src/models/retinanet/feature_pyramid.py +3 -2
- keras_hub/src/models/retinanet/prediction_head.py +2 -2
- keras_hub/src/models/retinanet/retinanet_backbone.py +2 -2
- keras_hub/src/models/retinanet/retinanet_image_converter.py +1 -1
- keras_hub/src/models/retinanet/retinanet_object_detector.py +5 -6
- keras_hub/src/models/retinanet/retinanet_presets.py +2 -1
- keras_hub/src/models/roberta/roberta_backbone.py +2 -2
- keras_hub/src/models/roberta/roberta_presets.py +4 -2
- keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
- keras_hub/src/models/sam/sam_backbone.py +2 -2
- keras_hub/src/models/sam/sam_image_segmenter.py +6 -5
- keras_hub/src/models/sam/sam_layers.py +5 -3
- keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
- keras_hub/src/models/sam/sam_transformer.py +5 -4
- keras_hub/src/models/segformer/segformer_backbone.py +18 -14
- keras_hub/src/models/segformer/segformer_image_segmenter.py +51 -38
- keras_hub/src/models/segformer/segformer_presets.py +24 -12
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
- keras_hub/src/models/stable_diffusion_3/mmdit.py +20 -1
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +1 -1
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +13 -6
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +2 -2
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +7 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
- keras_hub/src/models/task.py +4 -2
- keras_hub/src/models/text_classifier.py +2 -2
- keras_hub/src/models/text_to_image.py +5 -1
- keras_hub/src/models/vae/vae_layers.py +0 -1
- keras_hub/src/models/vit/__init__.py +5 -0
- keras_hub/src/models/vit/vit_backbone.py +152 -0
- keras_hub/src/models/vit/vit_image_classifier.py +187 -0
- keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vit/vit_image_converter.py +73 -0
- keras_hub/src/models/vit/vit_layers.py +391 -0
- keras_hub/src/models/vit/vit_presets.py +49 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +4 -2
- keras_hub/src/models/vit_det/vit_layers.py +3 -3
- keras_hub/src/models/whisper/whisper_audio_converter.py +1 -3
- keras_hub/src/models/whisper/whisper_backbone.py +6 -5
- keras_hub/src/models/whisper/whisper_decoder.py +3 -5
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
- keras_hub/src/models/xlnet/relative_attention.py +20 -19
- keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
- keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
- keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
- keras_hub/src/samplers/contrastive_sampler.py +2 -3
- keras_hub/src/samplers/sampler.py +2 -1
- keras_hub/src/tests/test_case.py +2 -2
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +2 -2
- keras_hub/src/tokenizers/byte_tokenizer.py +2 -8
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
- keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +7 -12
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +8 -5
- keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +7 -3
- keras_hub/src/utils/preset_utils.py +25 -18
- keras_hub/src/utils/tensor_utils.py +4 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +2 -4
- keras_hub/src/utils/transformers/convert_vit.py +150 -0
- keras_hub/src/utils/transformers/preset_loader.py +23 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/RECORD +148 -140
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/top_level.txt +0 -0
@@ -23,7 +23,8 @@ class ImageSegmenterPreprocessor(Preprocessor):
|
|
23
23
|
is set to `True` this will be resized to input image shape else will be
|
24
24
|
passed through unaltered.
|
25
25
|
- `sample_weight`: (Optional) Will be passed through unaltered.
|
26
|
-
- `resize_output_mask` bool: If set to `True` the output mask will be
|
26
|
+
- `resize_output_mask` bool: If set to `True` the output mask will be
|
27
|
+
resized to the same size as the input image. Defaults to `False`.
|
27
28
|
|
28
29
|
The layer will output either `x`, an `(x, y)` tuple if labels were provided,
|
29
30
|
or an `(x, y, sample_weight)` tuple if labels and sample weight were
|
@@ -77,7 +78,6 @@ class ImageSegmenterPreprocessor(Preprocessor):
|
|
77
78
|
x = self.image_converter(x)
|
78
79
|
|
79
80
|
if y is not None and self.image_converter and self.resize_output_mask:
|
80
|
-
|
81
81
|
y = keras.layers.Resizing(
|
82
82
|
height=(
|
83
83
|
self.image_converter.image_size[0]
|
@@ -34,17 +34,18 @@ class LlamaBackbone(Backbone):
|
|
34
34
|
num_layers (int): The number of transformer layers.
|
35
35
|
num_query_heads (int): The number of query attention heads for
|
36
36
|
each transformer.
|
37
|
-
hidden_dim (int): The size of the transformer encoding and pooling
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
hidden_dim (int): The size of the transformer encoding and pooling
|
38
|
+
layers.
|
39
|
+
intermediate_dim (int): The output dimension of the first Dense layer in
|
40
|
+
a three-layer feedforward network for each transformer.
|
41
|
+
num_key_value_heads (int): The number of key and value attention heads
|
42
|
+
for each transformer.
|
43
|
+
rope_max_wavelength (int, optional): The maximum angular wavelength of
|
44
|
+
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
|
45
|
+
rope_scaling_factor (float, optional): The scaling factor for
|
46
|
+
calculation of roatary embedding. Defaults to `1.0`.
|
47
|
+
layer_norm_epsilon (float, optional): Epsilon for the layer
|
48
|
+
normalization layers in the transformer decoder. Defaults to `1e-6`.
|
48
49
|
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
|
49
50
|
for model computations and weights. Note that some computations,
|
50
51
|
such as softmax and layer normalization, will always be done at
|
@@ -190,7 +191,8 @@ class LlamaBackbone(Backbone):
|
|
190
191
|
|
191
192
|
Example:
|
192
193
|
```
|
193
|
-
# Feel free to change the mesh shape to balance data and model
|
194
|
+
# Feel free to change the mesh shape to balance data and model
|
195
|
+
# parallelism
|
194
196
|
mesh = keras.distribution.DeviceMesh(
|
195
197
|
shape=(1, 8),
|
196
198
|
axis_names=('batch', 'model'),
|
@@ -210,12 +212,16 @@ class LlamaBackbone(Backbone):
|
|
210
212
|
llama_model = keras_hub.models.LlamaCausalLM.from_preset()
|
211
213
|
```
|
212
214
|
|
213
|
-
To see how the layout map was applied, load the model then run
|
215
|
+
To see how the layout map was applied, load the model then run
|
216
|
+
(for one decoder block):
|
214
217
|
```
|
215
218
|
embedding_layer = llama_model.backbone.get_layer("token_embedding")
|
216
219
|
decoder_block_1 = llama_model.backbone.get_layer('transformer_layer_0')
|
217
220
|
for variable in embedding_layer.weights + decoder_block_1.weights:
|
218
|
-
print(
|
221
|
+
print(
|
222
|
+
f'{variable.path:<58} {str(variable.shape):<16} '
|
223
|
+
f'{str(variable.value.sharding.spec)}'
|
224
|
+
)
|
219
225
|
```
|
220
226
|
|
221
227
|
Args:
|
@@ -230,22 +236,24 @@ class LlamaBackbone(Backbone):
|
|
230
236
|
for all the model weights.
|
231
237
|
"""
|
232
238
|
# The weight path and shape of the Llama backbone is like below
|
233
|
-
# token_embedding/embeddings
|
239
|
+
# token_embedding/embeddings (128256, 2048)
|
234
240
|
# repeat block for decoder
|
235
|
-
# transformer_layer_0/self_attention/query/kernel
|
236
|
-
# transformer_layer_0/self_attention/key/kernel
|
237
|
-
# transformer_layer_0/self_attention/value/kernel
|
238
|
-
# transformer_layer_0/self_attention/attention_output/kernel
|
239
|
-
#
|
240
|
-
# transformer_layer_0/
|
241
|
-
# transformer_layer_0/
|
242
|
-
#
|
243
|
-
# transformer_layer_0/
|
241
|
+
# transformer_layer_0/self_attention/query/kernel (2048, 32, 64)
|
242
|
+
# transformer_layer_0/self_attention/key/kernel (2048, 8, 64)
|
243
|
+
# transformer_layer_0/self_attention/value/kernel (2048, 8, 64)
|
244
|
+
# transformer_layer_0/self_attention/attention_output/kernel
|
245
|
+
# (32, 64, 2048)
|
246
|
+
# transformer_layer_0/self_attention_layernorm/scale (2048,)
|
247
|
+
# transformer_layer_0/feedforward_intermediate_dense/kernel
|
248
|
+
# (2048, 8192)
|
249
|
+
# transformer_layer_0/feedforward_gate_dense/kernel (2048, 8192)
|
250
|
+
# transformer_layer_0/feedforward_output_dense/kerne (8192, 2048)
|
251
|
+
# transformer_layer_0/feedforward_layernorm/scale (2048,)
|
244
252
|
|
245
253
|
if not isinstance(device_mesh, keras.distribution.DeviceMesh):
|
246
254
|
raise ValueError(
|
247
|
-
"Invalid device_mesh type. Expected
|
248
|
-
f" got {type(device_mesh)}"
|
255
|
+
"Invalid device_mesh type. Expected "
|
256
|
+
f"`keras.distribution.Device`, got {type(device_mesh)}"
|
249
257
|
)
|
250
258
|
if model_parallel_dim_name not in device_mesh.axis_names:
|
251
259
|
raise ValueError(
|
@@ -24,17 +24,18 @@ class Llama3Backbone(LlamaBackbone):
|
|
24
24
|
num_layers (int): The number of transformer layers.
|
25
25
|
num_query_heads (int): The number of query attention heads for
|
26
26
|
each transformer.
|
27
|
-
hidden_dim (int): The size of the transformer encoding and pooling
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
27
|
+
hidden_dim (int): The size of the transformer encoding and pooling
|
28
|
+
layers.
|
29
|
+
intermediate_dim (int): The output dimension of the first Dense layer in
|
30
|
+
a three-layer feedforward network for each transformer.
|
31
|
+
num_key_value_heads (int): The number of key and value attention heads
|
32
|
+
fo each transformer.
|
33
|
+
rope_max_wavelength (int, optional): The maximum angular wavelength of
|
34
|
+
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
|
35
|
+
rope_scaling_factor (float, optional): The scaling factor for
|
36
|
+
calculation of roatary embedding. Defaults to `1.0`.
|
37
|
+
layer_norm_epsilon (float, optional): Epsilon for the layer
|
38
|
+
normalization layers in the transformer decoder. Defaults to `1e-6`.
|
38
39
|
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
|
39
40
|
for model computations and weights. Note that some computations,
|
40
41
|
such as softmax and layer normalization, will always be done at
|
@@ -1,9 +1,9 @@
|
|
1
1
|
from keras_hub.src.api_export import keras_hub_export
|
2
|
+
from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM
|
2
3
|
from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
|
3
4
|
from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
|
4
5
|
Llama3CausalLMPreprocessor,
|
5
6
|
)
|
6
|
-
from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM
|
7
7
|
|
8
8
|
|
9
9
|
@keras_hub_export("keras_hub.models.Llama3CausalLM")
|
@@ -38,22 +38,23 @@ class MistralBackbone(Backbone):
|
|
38
38
|
num_layers (int): The number of transformer layers.
|
39
39
|
num_query_heads (int): The number of query attention heads for
|
40
40
|
each transformer.
|
41
|
-
hidden_dim (int): The size of the transformer encoding and pooling
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
41
|
+
hidden_dim (int): The size of the transformer encoding and pooling
|
42
|
+
layers.
|
43
|
+
intermediate_dim (int): The output dimension of the first Dense layer
|
44
|
+
in a three-layer feedforward network for each transformer.
|
45
|
+
num_key_value_heads (int): The number of key and value attention heads
|
46
|
+
for each transformer.
|
47
|
+
rope_max_wavelength (int, optional): The maximum angular wavelength of
|
48
|
+
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
|
49
|
+
rope_scaling_factor (float, optional): The scaling factor for
|
50
|
+
calculation of roatary embedding. Defaults to `1.0`.
|
51
|
+
layer_norm_epsilon (float, optional): Epsilon for the layer
|
52
|
+
normalization layers in the transformer decoder. Defaults to `1e-6`.
|
52
53
|
sliding_window (int, optional): The sliding window for the mistral
|
53
|
-
attention layers. This controls the maximum cache size for the
|
54
|
-
layers in each transformer decoder. Only `sliding_window`
|
55
|
-
are saved in the cache and used to generate the
|
56
|
-
Defaults to `512`.
|
54
|
+
attention layers. This controls the maximum cache size for the
|
55
|
+
attention layers in each transformer decoder. Only `sliding_window`
|
56
|
+
number of tokens are saved in the cache and used to generate the
|
57
|
+
next token. Defaults to `512`.
|
57
58
|
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
|
58
59
|
for model computations and weights. Note that some computations,
|
59
60
|
such as softmax and layer normalization, will always be done at
|
@@ -28,9 +28,9 @@ class MistralCausalLM(CausalLM):
|
|
28
28
|
|
29
29
|
Args:
|
30
30
|
backbone: A `keras_hub.models.MistralBackbone` instance.
|
31
|
-
preprocessor: A `keras_hub.models.MistralCausalLMPreprocessor` or
|
32
|
-
If `None`, this model will not apply preprocessing, and
|
33
|
-
should be preprocessed before calling the model.
|
31
|
+
preprocessor: A `keras_hub.models.MistralCausalLMPreprocessor` or
|
32
|
+
`None`. If `None`, this model will not apply preprocessing, and
|
33
|
+
inputs should be preprocessed before calling the model.
|
34
34
|
"""
|
35
35
|
|
36
36
|
backbone_cls = MistralBackbone
|
@@ -215,7 +215,8 @@ class MistralTransformerDecoder(keras.layers.Layer):
|
|
215
215
|
# Mistral uses a banded attention mask if sliding window is not None
|
216
216
|
if self.sliding_window is not None:
|
217
217
|
# Below is a workaround for `ops.triu` for Keras 2.
|
218
|
-
# TODO(tirthasheshpatel): Use `ops.triu` once Keras 2 support is
|
218
|
+
# TODO(tirthasheshpatel): Use `ops.triu` once Keras 2 support is
|
219
|
+
# removed.
|
219
220
|
# causal_mask = ops.triu(causal_mask, k=-self.sliding_window)
|
220
221
|
i = ops.arange(output_length)[:, None] + cache_update_index
|
221
222
|
j = ops.arange(input_length)[None, :]
|
@@ -43,8 +43,8 @@ class MiTBackbone(FeaturePyramidBackbone):
|
|
43
43
|
https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/classification/mix_transformer)
|
44
44
|
|
45
45
|
Args:
|
46
|
-
layerwise_depths: The number of transformer encoders to be used per
|
47
|
-
network.
|
46
|
+
layerwise_depths: The number of transformer encoders to be used per
|
47
|
+
layer in the network.
|
48
48
|
num_layers: int. The number of Transformer layers.
|
49
49
|
layerwise_num_heads: list of integers, the number of heads to use
|
50
50
|
in the attention computation for each layer.
|
@@ -58,7 +58,8 @@ class MiTBackbone(FeaturePyramidBackbone):
|
|
58
58
|
image_shape: optional shape tuple, defaults to (None, None, 3).
|
59
59
|
hidden_dims: the embedding dims per hierarchical layer, used as
|
60
60
|
the levels of the feature pyramid.
|
61
|
-
patch_sizes: list of integers, the patch_size to apply for each
|
61
|
+
patch_sizes: list of integers, the patch_size to apply for each
|
62
|
+
layer.
|
62
63
|
strides: list of integers, stride to apply for each layer.
|
63
64
|
|
64
65
|
Examples:
|
@@ -80,7 +80,8 @@ class HierarchicalTransformerEncoder(keras.layers.Layer):
|
|
80
80
|
`LayerNormalization` layers. Defaults to `1e-06`
|
81
81
|
sr_ratio: integer, the ratio to use within
|
82
82
|
`SegFormerMultiheadAttention`. If set to > 1, a `Conv2D`
|
83
|
-
|
83
|
+
layer is used to reduce the length of the sequence.
|
84
|
+
Defaults to `1`.
|
84
85
|
"""
|
85
86
|
|
86
87
|
def __init__(
|
@@ -47,11 +47,11 @@ class MobileNetBackbone(Backbone):
|
|
47
47
|
of filters in each layer.
|
48
48
|
- If `depth_multiplier` > 1.0, proportionally increases the number
|
49
49
|
of filters in each layer.
|
50
|
-
- If `depth_multiplier` = 1, default number of filters from the
|
51
|
-
are used at each layer.
|
50
|
+
- If `depth_multiplier` = 1, default number of filters from the
|
51
|
+
paper are used at each layer.
|
52
52
|
input_num_filters: number of filters in first convolution layer
|
53
|
-
output_num_filters: specifies whether to add conv and batch_norm in the
|
54
|
-
if set to None, it will not add these layers in the end.
|
53
|
+
output_num_filters: specifies whether to add conv and batch_norm in the
|
54
|
+
end, if set to None, it will not add these layers in the end.
|
55
55
|
'None' for MobileNetV1
|
56
56
|
input_activation: activation function to be used in the input layer
|
57
57
|
'hard_swish' for MobileNetV3,
|
@@ -365,7 +365,7 @@ def apply_depthwise_conv_block(
|
|
365
365
|
batch normalization and relu6 activation.
|
366
366
|
|
367
367
|
Args:
|
368
|
-
x: Input tensor of shape `(rows, cols, channels)
|
368
|
+
x: Input tensor of shape `(rows, cols, channels)`
|
369
369
|
filters: Integer, the dimensionality of the output space
|
370
370
|
(i.e. the number of output filters in the pointwise convolution).
|
371
371
|
depth_multiplier: controls the width of the network.
|
@@ -383,8 +383,8 @@ def apply_depthwise_conv_block(
|
|
383
383
|
block_id: Integer, a unique identification designating the block number.
|
384
384
|
|
385
385
|
Input shape:
|
386
|
-
4D tensor with shape
|
387
|
-
4D tensor with shape
|
386
|
+
4D tensor with shape `(batch, rows, cols, channels)` in "channels_last"
|
387
|
+
4D tensor with shape `(batch, channels, rows, cols)` in "channels_first"
|
388
388
|
Returns:
|
389
389
|
Output tensor of block.
|
390
390
|
"""
|
@@ -171,8 +171,8 @@ class OPTCausalLM(CausalLM):
|
|
171
171
|
Args:
|
172
172
|
token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
|
173
173
|
cache: a dense float Tensor, the cache of key and value.
|
174
|
-
cache_update_index: int, or int Tensor. The index of current inputs
|
175
|
-
whole sequence.
|
174
|
+
cache_update_index: int, or int Tensor. The index of current inputs
|
175
|
+
in the whole sequence.
|
176
176
|
|
177
177
|
Returns:
|
178
178
|
A (logits, hidden_states, cache) tuple. Where `logits` is the
|
@@ -68,8 +68,8 @@ class PaliGemmaBackbone(Backbone):
|
|
68
68
|
`hidden_dim / num_query_heads`. Defaults to `True`.
|
69
69
|
use_post_ffw_norm: boolean. Whether to normalize after the feedforward
|
70
70
|
block. Defaults to `False`.
|
71
|
-
use_post_attention_norm: boolean. Whether to normalize after the
|
72
|
-
block. Defaults to `False`.
|
71
|
+
use_post_attention_norm: boolean. Whether to normalize after the
|
72
|
+
attention block. Defaults to `False`.
|
73
73
|
attention_logit_soft_cap: `None` or int. Soft cap for the attention
|
74
74
|
logits. Defaults to `None`.
|
75
75
|
final_logit_soft_cap: `None` or int. Soft cap for the final logits.
|
@@ -300,7 +300,9 @@ class PaliGemmaBackbone(Backbone):
|
|
300
300
|
"final_logit_soft_cap": self.final_logit_soft_cap,
|
301
301
|
"attention_logit_soft_cap": self.attention_logit_soft_cap,
|
302
302
|
"sliding_window_size": self.sliding_window_size,
|
303
|
-
"use_sliding_window_attention":
|
303
|
+
"use_sliding_window_attention": (
|
304
|
+
self.use_sliding_window_attention
|
305
|
+
),
|
304
306
|
"layer_norm_epsilon": self.layer_norm_epsilon,
|
305
307
|
"dropout": self.dropout,
|
306
308
|
}
|
@@ -61,7 +61,7 @@ class PaliGemmaVitEmbeddings(keras.layers.Layer):
|
|
61
61
|
|
62
62
|
class PaliGemmaVitAttention(keras.layers.Layer):
|
63
63
|
"""
|
64
|
-
Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/clip/modeling_clip.py
|
64
|
+
Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/clip/modeling_clip.py
|
65
65
|
"""
|
66
66
|
|
67
67
|
def __init__(
|
@@ -120,7 +120,7 @@ class PaliGemmaVitAttention(keras.layers.Layer):
|
|
120
120
|
|
121
121
|
def _transpose_for_scores(self, tensor, batch_size):
|
122
122
|
"""
|
123
|
-
Adapted from https://github.com/huggingface/transformers/blob/8e164c5400b7b413c7b8fb32e35132001effc970/src/transformers/models/bert/modeling_tf_bert.py#L252
|
123
|
+
Adapted from https://github.com/huggingface/transformers/blob/8e164c5400b7b413c7b8fb32e35132001effc970/src/transformers/models/bert/modeling_tf_bert.py#L252
|
124
124
|
"""
|
125
125
|
# [batch_size, seq_len, all_head_dim] ->
|
126
126
|
# [batch_size, seq_len, num_heads, head_dim]
|
@@ -53,7 +53,6 @@ class Phi3Decoder(keras.layers.Layer):
|
|
53
53
|
self.kernel_initializer = keras.initializers.get(kernel_initializer)
|
54
54
|
|
55
55
|
def build(self, decoder_sequence_shape):
|
56
|
-
|
57
56
|
# Pre-attention layernorm.
|
58
57
|
self.pre_attention_layernorm = Phi3LayerNorm(
|
59
58
|
epsilon=self.layer_norm_epsilon,
|
@@ -43,7 +43,7 @@ class Phi3SuScaledRotaryEmbedding(RotaryEmbedding):
|
|
43
43
|
max_sequence_length=4096,
|
44
44
|
pretraining_sequence_length=4096,
|
45
45
|
max_wavelength=10000,
|
46
|
-
**kwargs
|
46
|
+
**kwargs,
|
47
47
|
):
|
48
48
|
super().__init__(max_wavelength=max_wavelength, **kwargs)
|
49
49
|
self.max_sequence_length = max_sequence_length
|
@@ -161,12 +161,12 @@ class Preprocessor(PreprocessingLayer):
|
|
161
161
|
Examples:
|
162
162
|
```python
|
163
163
|
# Load a preprocessor for Gemma generation.
|
164
|
-
preprocessor = keras_hub.models.
|
164
|
+
preprocessor = keras_hub.models.CausalLMPreprocessor.from_preset(
|
165
165
|
"gemma_2b_en",
|
166
166
|
)
|
167
167
|
|
168
168
|
# Load a preprocessor for Bert classification.
|
169
|
-
preprocessor = keras_hub.models.
|
169
|
+
preprocessor = keras_hub.models.TextClassifierPreprocessor.from_preset(
|
170
170
|
"bert_base_en",
|
171
171
|
)
|
172
172
|
```
|
@@ -9,8 +9,9 @@ class FeaturePyramid(keras.layers.Layer):
|
|
9
9
|
"""A Feature Pyramid Network (FPN) layer.
|
10
10
|
|
11
11
|
This implements the paper:
|
12
|
-
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He,
|
13
|
-
and Serge Belongie.
|
12
|
+
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He,
|
13
|
+
Bharath Hariharan, and Serge Belongie.
|
14
|
+
Feature Pyramid Networks for Object Detection.
|
14
15
|
(https://arxiv.org/pdf/1612.03144)
|
15
16
|
|
16
17
|
Feature Pyramid Networks (FPNs) are basic components that are added to an
|
@@ -7,8 +7,8 @@ class PredictionHead(keras.layers.Layer):
|
|
7
7
|
"""A head for classification or bounding box regression predictions.
|
8
8
|
|
9
9
|
Args:
|
10
|
-
output_filters: int. The umber of convolution filters in the final
|
11
|
-
The number of output channels determines the prediction type:
|
10
|
+
output_filters: int. The umber of convolution filters in the final
|
11
|
+
layer. The number of output channels determines the prediction type:
|
12
12
|
- **Classification**:
|
13
13
|
`output_filters = num_anchors * num_classes`
|
14
14
|
Predicts class probabilities for each anchor.
|
@@ -42,7 +42,8 @@ class RetinaNetBackbone(FeaturePyramidBackbone):
|
|
42
42
|
|
43
43
|
Raises:
|
44
44
|
ValueError: If `min_level` is greater than `max_level`.
|
45
|
-
ValueError: If `backbone_max_level` is less than 5 and `max_level` is
|
45
|
+
ValueError: If `backbone_max_level` is less than 5 and `max_level` is
|
46
|
+
greater than or equal to 5.
|
46
47
|
"""
|
47
48
|
|
48
49
|
def __init__(
|
@@ -57,7 +58,6 @@ class RetinaNetBackbone(FeaturePyramidBackbone):
|
|
57
58
|
dtype=None,
|
58
59
|
**kwargs,
|
59
60
|
):
|
60
|
-
|
61
61
|
# === Layers ===
|
62
62
|
if min_level > max_level:
|
63
63
|
raise ValueError(
|
@@ -14,7 +14,7 @@ from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone
|
|
14
14
|
from keras_hub.src.models.retinanet.retinanet_label_encoder import (
|
15
15
|
RetinaNetLabelEncoder,
|
16
16
|
)
|
17
|
-
from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import (
|
17
|
+
from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import ( # noqa: E501
|
18
18
|
RetinaNetObjectDetectorPreprocessor,
|
19
19
|
)
|
20
20
|
|
@@ -54,10 +54,8 @@ class RetinaNetObjectDetector(ImageObjectDetector):
|
|
54
54
|
ground truth boxes and classes into training targets. It matches
|
55
55
|
ground truth boxes to anchors based on IoU and encodes box
|
56
56
|
coordinates as offsets. If `None`, a default encoder is created.
|
57
|
-
See the
|
58
|
-
|
59
|
-
class for details. If None, a default encoder is created with
|
60
|
-
standard parameters.
|
57
|
+
See the `RetinaNetLabelEncoder` class for details. If None, a
|
58
|
+
default encoder is created with standard parameters.
|
61
59
|
- `anchor_generator`: Same as the model's.
|
62
60
|
- `bounding_box_format`: Same as the model's
|
63
61
|
`bounding_box_format`.
|
@@ -74,7 +72,8 @@ class RetinaNetObjectDetector(ImageObjectDetector):
|
|
74
72
|
pre_logits_num_conv_layers: int. The number of convolutional layers in
|
75
73
|
the head before the logits layer. These convolutional layers are
|
76
74
|
applied before the final linear layer (logits) that produces the
|
77
|
-
output predictions (bounding box regressions,
|
75
|
+
output predictions (bounding box regressions,
|
76
|
+
classification scores).
|
78
77
|
preprocessor: Optional. An instance of
|
79
78
|
`RetinaNetObjectDetectorPreprocessor`or a custom preprocessor.
|
80
79
|
Handles image preprocessing before feeding into the backbone.
|
@@ -5,7 +5,8 @@ backbone_presets = {
|
|
5
5
|
"retinanet_resnet50_fpn_coco": {
|
6
6
|
"metadata": {
|
7
7
|
"description": (
|
8
|
-
"RetinaNet model with ResNet50 backbone fine-tuned on COCO in
|
8
|
+
"RetinaNet model with ResNet50 backbone fine-tuned on COCO in "
|
9
|
+
"800x800 resolution."
|
9
10
|
),
|
10
11
|
"params": 34121239,
|
11
12
|
"path": "retinanet",
|
@@ -23,8 +23,8 @@ class RobertaBackbone(Backbone):
|
|
23
23
|
|
24
24
|
The default constructor gives a fully customizable, randomly initialized
|
25
25
|
RoBERTa encoder with any number of layers, heads, and embedding
|
26
|
-
dimensions. To load preset architectures and weights, use the
|
27
|
-
constructor.
|
26
|
+
dimensions. To load preset architectures and weights, use the
|
27
|
+
`from_preset()` constructor.
|
28
28
|
|
29
29
|
Disclaimer: Pre-trained models are provided on an "as is" basis, without
|
30
30
|
warranties or conditions of any kind. The underlying model is provided by a
|
@@ -5,7 +5,8 @@ backbone_presets = {
|
|
5
5
|
"metadata": {
|
6
6
|
"description": (
|
7
7
|
"12-layer RoBERTa model where case is maintained."
|
8
|
-
"Trained on English Wikipedia, BooksCorpus, CommonCraw, and
|
8
|
+
"Trained on English Wikipedia, BooksCorpus, CommonCraw, and "
|
9
|
+
"OpenWebText."
|
9
10
|
),
|
10
11
|
"params": 124052736,
|
11
12
|
"path": "roberta",
|
@@ -16,7 +17,8 @@ backbone_presets = {
|
|
16
17
|
"metadata": {
|
17
18
|
"description": (
|
18
19
|
"24-layer RoBERTa model where case is maintained."
|
19
|
-
"Trained on English Wikipedia, BooksCorpus, CommonCraw, and
|
20
|
+
"Trained on English Wikipedia, BooksCorpus, CommonCraw, and "
|
21
|
+
"OpenWebText."
|
20
22
|
),
|
21
23
|
"params": 354307072,
|
22
24
|
"path": "roberta",
|
@@ -38,9 +38,9 @@ class RobertaTextClassifier(TextClassifier):
|
|
38
38
|
Args:
|
39
39
|
backbone: A `keras_hub.models.RobertaBackbone` instance.
|
40
40
|
num_classes: int. Number of classes to predict.
|
41
|
-
preprocessor: A `keras_hub.models.RobertaTextClassifierPreprocessor` or
|
42
|
-
`None`, this model will not apply preprocessing, and
|
43
|
-
be preprocessed before calling the model.
|
41
|
+
preprocessor: A `keras_hub.models.RobertaTextClassifierPreprocessor` or
|
42
|
+
`None`. If `None`, this model will not apply preprocessing, and
|
43
|
+
inputs should be preprocessed before calling the model.
|
44
44
|
activation: Optional `str` or callable. The activation function to use
|
45
45
|
on the model outputs. Set `activation="softmax"` to return output
|
46
46
|
probabilities. Defaults to `None`.
|
@@ -9,8 +9,8 @@ class SAMBackbone(Backbone):
|
|
9
9
|
"""A backbone for the Segment Anything Model (SAM).
|
10
10
|
|
11
11
|
Args:
|
12
|
-
image_encoder: `keras_hub.models.ViTDetBackbone`. A feature extractor
|
13
|
-
the input images.
|
12
|
+
image_encoder: `keras_hub.models.ViTDetBackbone`. A feature extractor
|
13
|
+
for the input images.
|
14
14
|
prompt_encoder: `keras_hub.layers.SAMPromptEncoder`. A Keras layer to
|
15
15
|
compute embeddings for points, box, and mask prompt.
|
16
16
|
mask_decoder: `keras_hub.layers.SAMMaskDecoder`. A Keras layer to
|
@@ -200,17 +200,18 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
200
200
|
def _add_placeholder_prompts(self, inputs):
|
201
201
|
"""Adds placeholder prompt inputs for a call to SAM.
|
202
202
|
|
203
|
-
Because SAM is a functional subclass model, all inputs must be specified
|
204
|
-
calls to the model. However, prompt inputs are all optional, so we
|
205
|
-
add placeholders when they're not specified by the user.
|
203
|
+
Because SAM is a functional subclass model, all inputs must be specified
|
204
|
+
in calls to the model. However, prompt inputs are all optional, so we
|
205
|
+
have to add placeholders when they're not specified by the user.
|
206
206
|
"""
|
207
207
|
inputs = inputs.copy()
|
208
208
|
|
209
209
|
# Get the batch shape based on the image input
|
210
210
|
batch_size = ops.shape(inputs["images"])[0]
|
211
211
|
|
212
|
-
# The type of the placeholders must match the existing inputs with
|
213
|
-
# to whether or not they are tensors (as opposed to Numpy
|
212
|
+
# The type of the placeholders must match the existing inputs with
|
213
|
+
# respect to whether or not they are tensors (as opposed to Numpy
|
214
|
+
# arrays).
|
214
215
|
zeros = ops.zeros if ops.is_tensor(inputs["images"]) else np.zeros
|
215
216
|
|
216
217
|
# Fill in missing inputs.
|
@@ -170,8 +170,8 @@ class TwoWayMultiHeadAttention(keras.layers.Layer):
|
|
170
170
|
key_dim: int. Size of each attention head for query, key, and
|
171
171
|
value.
|
172
172
|
intermediate_dim: int. Number of hidden dims to use in the mlp block.
|
173
|
-
skip_first_layer_pos_embedding: bool. A boolean indicating whether to
|
174
|
-
first layer positional embeddings.
|
173
|
+
skip_first_layer_pos_embedding: bool. A boolean indicating whether to
|
174
|
+
skip the first layer positional embeddings.
|
175
175
|
attention_downsample_rate: int, optional. The downsample rate to use
|
176
176
|
in the attention layers. Defaults to 2.
|
177
177
|
activation: str, optional. The activation for the mlp block's output
|
@@ -296,7 +296,9 @@ class TwoWayMultiHeadAttention(keras.layers.Layer):
|
|
296
296
|
"num_heads": self.num_heads,
|
297
297
|
"key_dim": self.key_dim,
|
298
298
|
"intermediate_dim": self.intermediate_dim,
|
299
|
-
"skip_first_layer_pos_embedding":
|
299
|
+
"skip_first_layer_pos_embedding": (
|
300
|
+
self.skip_first_layer_pos_embedding
|
301
|
+
),
|
300
302
|
"attention_downsample_rate": self.attention_downsample_rate,
|
301
303
|
"activation": self.activation,
|
302
304
|
}
|
@@ -57,7 +57,7 @@ class SAMPromptEncoder(keras.layers.Layer):
|
|
57
57
|
input_image_size=(1024, 1024),
|
58
58
|
mask_in_channels=16,
|
59
59
|
activation="gelu",
|
60
|
-
**kwargs
|
60
|
+
**kwargs,
|
61
61
|
):
|
62
62
|
super().__init__(**kwargs)
|
63
63
|
self.hidden_size = hidden_size
|
@@ -305,7 +305,9 @@ class SAMPromptEncoder(keras.layers.Layer):
|
|
305
305
|
return {
|
306
306
|
"prompt_sparse_embeddings": sparse_embeddings,
|
307
307
|
"prompt_dense_embeddings": dense_embeddings,
|
308
|
-
"prompt_dense_positional_embeddings":
|
308
|
+
"prompt_dense_positional_embeddings": (
|
309
|
+
prompt_dense_positional_embeddings
|
310
|
+
),
|
309
311
|
}
|
310
312
|
|
311
313
|
def get_config(self):
|