keras-hub-nightly 0.19.0.dev202412110353__py3-none-any.whl → 0.19.0.dev202412130354__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. keras_hub/api/layers/__init__.py +1 -0
  2. keras_hub/api/models/__init__.py +11 -6
  3. keras_hub/api/tokenizers/__init__.py +1 -1
  4. keras_hub/src/bounding_box/converters.py +2 -2
  5. keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
  6. keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
  7. keras_hub/src/layers/modeling/rms_normalization.py +8 -6
  8. keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
  9. keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
  10. keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
  11. keras_hub/src/layers/modeling/transformer_encoder.py +3 -1
  12. keras_hub/src/metrics/bleu.py +1 -1
  13. keras_hub/src/models/albert/albert_text_classifier.py +7 -7
  14. keras_hub/src/models/bart/bart_backbone.py +4 -4
  15. keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
  16. keras_hub/src/models/bert/bert_presets.py +4 -2
  17. keras_hub/src/models/bert/bert_text_classifier.py +3 -3
  18. keras_hub/src/models/causal_lm.py +19 -15
  19. keras_hub/src/models/clip/clip_vision_embedding.py +1 -1
  20. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
  21. keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
  22. keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
  23. keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
  24. keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
  25. keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
  26. keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +17 -13
  27. keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +4 -3
  28. keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +1 -1
  29. keras_hub/src/models/densenet/densenet_backbone.py +3 -1
  30. keras_hub/src/models/densenet/densenet_image_classifier.py +1 -1
  31. keras_hub/src/models/densenet/densenet_presets.py +6 -6
  32. keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
  33. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
  34. keras_hub/src/models/distil_bert/distil_bert_presets.py +2 -1
  35. keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
  36. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
  37. keras_hub/src/models/efficientnet/cba.py +1 -1
  38. keras_hub/src/models/efficientnet/efficientnet_backbone.py +20 -8
  39. keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +1 -1
  40. keras_hub/src/models/efficientnet/efficientnet_presets.py +12 -11
  41. keras_hub/src/models/efficientnet/fusedmbconv.py +3 -5
  42. keras_hub/src/models/efficientnet/mbconv.py +1 -1
  43. keras_hub/src/models/electra/electra_backbone.py +2 -2
  44. keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
  45. keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
  46. keras_hub/src/models/falcon/falcon_backbone.py +5 -3
  47. keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
  48. keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
  49. keras_hub/src/models/flux/flux_layers.py +46 -44
  50. keras_hub/src/models/flux/flux_maths.py +24 -17
  51. keras_hub/src/models/flux/flux_model.py +24 -19
  52. keras_hub/src/models/flux/flux_presets.py +2 -1
  53. keras_hub/src/models/flux/flux_text_to_image.py +7 -3
  54. keras_hub/src/models/gemma/gemma_backbone.py +27 -20
  55. keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
  56. keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
  57. keras_hub/src/models/gemma/gemma_presets.py +9 -3
  58. keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
  59. keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
  60. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
  61. keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
  62. keras_hub/src/models/image_classifier_preprocessor.py +4 -1
  63. keras_hub/src/models/image_object_detector.py +2 -2
  64. keras_hub/src/models/image_object_detector_preprocessor.py +4 -4
  65. keras_hub/src/models/image_segmenter_preprocessor.py +2 -2
  66. keras_hub/src/models/llama/llama_backbone.py +34 -26
  67. keras_hub/src/models/llama3/llama3_backbone.py +12 -11
  68. keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
  69. keras_hub/src/models/mistral/mistral_backbone.py +16 -15
  70. keras_hub/src/models/mistral/mistral_causal_lm.py +3 -3
  71. keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
  72. keras_hub/src/models/mit/mit_backbone.py +4 -3
  73. keras_hub/src/models/mit/mit_layers.py +2 -1
  74. keras_hub/src/models/mobilenet/mobilenet_backbone.py +7 -7
  75. keras_hub/src/models/opt/opt_causal_lm.py +2 -2
  76. keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +5 -3
  77. keras_hub/src/models/pali_gemma/pali_gemma_vit.py +2 -2
  78. keras_hub/src/models/phi3/phi3_decoder.py +0 -1
  79. keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
  80. keras_hub/src/models/preprocessor.py +2 -2
  81. keras_hub/src/models/retinanet/feature_pyramid.py +3 -2
  82. keras_hub/src/models/retinanet/prediction_head.py +2 -2
  83. keras_hub/src/models/retinanet/retinanet_backbone.py +2 -2
  84. keras_hub/src/models/retinanet/retinanet_image_converter.py +1 -1
  85. keras_hub/src/models/retinanet/retinanet_object_detector.py +5 -6
  86. keras_hub/src/models/retinanet/retinanet_presets.py +2 -1
  87. keras_hub/src/models/roberta/roberta_backbone.py +2 -2
  88. keras_hub/src/models/roberta/roberta_presets.py +4 -2
  89. keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
  90. keras_hub/src/models/sam/sam_backbone.py +2 -2
  91. keras_hub/src/models/sam/sam_image_segmenter.py +6 -5
  92. keras_hub/src/models/sam/sam_layers.py +5 -3
  93. keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
  94. keras_hub/src/models/sam/sam_transformer.py +5 -4
  95. keras_hub/src/models/segformer/segformer_backbone.py +18 -14
  96. keras_hub/src/models/segformer/segformer_image_segmenter.py +51 -38
  97. keras_hub/src/models/segformer/segformer_presets.py +24 -12
  98. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
  99. keras_hub/src/models/stable_diffusion_3/mmdit.py +20 -1
  100. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +1 -1
  101. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +13 -6
  102. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +2 -2
  103. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +7 -3
  104. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
  105. keras_hub/src/models/task.py +4 -2
  106. keras_hub/src/models/text_classifier.py +2 -2
  107. keras_hub/src/models/text_to_image.py +5 -1
  108. keras_hub/src/models/vae/vae_layers.py +0 -1
  109. keras_hub/src/models/vit/__init__.py +5 -0
  110. keras_hub/src/models/vit/vit_backbone.py +152 -0
  111. keras_hub/src/models/vit/vit_image_classifier.py +187 -0
  112. keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
  113. keras_hub/src/models/vit/vit_image_converter.py +73 -0
  114. keras_hub/src/models/vit/vit_layers.py +391 -0
  115. keras_hub/src/models/vit/vit_presets.py +49 -0
  116. keras_hub/src/models/vit_det/vit_det_backbone.py +4 -2
  117. keras_hub/src/models/vit_det/vit_layers.py +3 -3
  118. keras_hub/src/models/whisper/whisper_audio_converter.py +1 -3
  119. keras_hub/src/models/whisper/whisper_backbone.py +6 -5
  120. keras_hub/src/models/whisper/whisper_decoder.py +3 -5
  121. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
  122. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
  123. keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
  124. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
  125. keras_hub/src/models/xlnet/relative_attention.py +20 -19
  126. keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
  127. keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
  128. keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
  129. keras_hub/src/samplers/contrastive_sampler.py +2 -3
  130. keras_hub/src/samplers/sampler.py +2 -1
  131. keras_hub/src/tests/test_case.py +2 -2
  132. keras_hub/src/tokenizers/byte_pair_tokenizer.py +2 -2
  133. keras_hub/src/tokenizers/byte_tokenizer.py +2 -8
  134. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
  135. keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +7 -12
  136. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +8 -5
  137. keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +7 -3
  138. keras_hub/src/utils/preset_utils.py +25 -18
  139. keras_hub/src/utils/tensor_utils.py +4 -4
  140. keras_hub/src/utils/timm/convert_efficientnet.py +2 -4
  141. keras_hub/src/utils/transformers/convert_vit.py +150 -0
  142. keras_hub/src/utils/transformers/preset_loader.py +23 -0
  143. keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
  144. keras_hub/src/version_utils.py +1 -1
  145. {keras_hub_nightly-0.19.0.dev202412110353.dist-info → keras_hub_nightly-0.19.0.dev202412130354.dist-info}/METADATA +1 -1
  146. {keras_hub_nightly-0.19.0.dev202412110353.dist-info → keras_hub_nightly-0.19.0.dev202412130354.dist-info}/RECORD +148 -140
  147. {keras_hub_nightly-0.19.0.dev202412110353.dist-info → keras_hub_nightly-0.19.0.dev202412130354.dist-info}/WHEEL +0 -0
  148. {keras_hub_nightly-0.19.0.dev202412110353.dist-info → keras_hub_nightly-0.19.0.dev202412130354.dist-info}/top_level.txt +0 -0
@@ -63,6 +63,7 @@ from keras_hub.src.models.segformer.segformer_image_converter import (
63
63
  SegFormerImageConverter,
64
64
  )
65
65
  from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter
66
+ from keras_hub.src.models.vit.vit_image_converter import ViTImageConverter
66
67
  from keras_hub.src.models.whisper.whisper_audio_converter import (
67
68
  WhisperAudioConverter,
68
69
  )
@@ -200,18 +200,18 @@ from keras_hub.src.models.image_segmenter_preprocessor import (
200
200
  )
201
201
  from keras_hub.src.models.image_to_image import ImageToImage
202
202
  from keras_hub.src.models.inpaint import Inpaint
203
- from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
204
- from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM
205
- from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
206
- Llama3CausalLMPreprocessor,
207
- )
208
- from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
209
203
  from keras_hub.src.models.llama.llama_backbone import LlamaBackbone
210
204
  from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM
211
205
  from keras_hub.src.models.llama.llama_causal_lm_preprocessor import (
212
206
  LlamaCausalLMPreprocessor,
213
207
  )
214
208
  from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
209
+ from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
210
+ from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM
211
+ from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
212
+ Llama3CausalLMPreprocessor,
213
+ )
214
+ from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
215
215
  from keras_hub.src.models.masked_lm import MaskedLM
216
216
  from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
217
217
  from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
@@ -330,6 +330,11 @@ from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageClassifier
330
330
  from keras_hub.src.models.vgg.vgg_image_classifier_preprocessor import (
331
331
  VGGImageClassifierPreprocessor,
332
332
  )
333
+ from keras_hub.src.models.vit.vit_backbone import ViTBackbone
334
+ from keras_hub.src.models.vit.vit_image_classifier import ViTImageClassifier
335
+ from keras_hub.src.models.vit.vit_image_classifier_preprocessor import (
336
+ ViTImageClassifierPreprocessor,
337
+ )
333
338
  from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone
334
339
  from keras_hub.src.models.whisper.whisper_backbone import WhisperBackbone
335
340
  from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer
@@ -21,8 +21,8 @@ from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer
21
21
  from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer
22
22
  from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer
23
23
  from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
24
- from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
25
24
  from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
25
+ from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
26
26
  from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
27
27
  from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer
28
28
  from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
@@ -73,8 +73,8 @@ def encode_box_to_deltas(
73
73
 
74
74
  if encoding_format not in ["center_xywh", "center_yxhw"]:
75
75
  raise ValueError(
76
- "`encoding_format` should be one of 'center_xywh' or 'center_yxhw', "
77
- f"got {encoding_format}"
76
+ "`encoding_format` should be one of 'center_xywh' or "
77
+ f"'center_yxhw', got {encoding_format}"
78
78
  )
79
79
 
80
80
  encoded_anchors = convert_format(
@@ -66,7 +66,7 @@ class FNetEncoder(keras.layers.Layer):
66
66
  layer_norm_epsilon=1e-5,
67
67
  kernel_initializer="glorot_uniform",
68
68
  bias_initializer="zeros",
69
- **kwargs
69
+ **kwargs,
70
70
  ):
71
71
  super().__init__(**kwargs)
72
72
  self.intermediate_dim = intermediate_dim
@@ -34,7 +34,8 @@ class MaskedLMHead(keras.layers.Layer):
34
34
  token_embedding: Optional. A `keras_hub.layers.ReversibleEmbedding`
35
35
  instance. If passed, the layer will be used to project from the
36
36
  `hidden_dim` of the model to the output `vocabulary_size`.
37
- intermediate_activation: The activation function of intermediate dense layer.
37
+ intermediate_activation: The activation function of intermediate dense
38
+ layer.
38
39
  activation: The activation function for the outputs of the layer.
39
40
  Usually either `None` (return logits), or `"softmax"`
40
41
  (return probabilities).
@@ -6,10 +6,11 @@ from keras_hub.src.api_export import keras_hub_export
6
6
 
7
7
  @keras_hub_export("keras_hub.layers.RMSNormalization")
8
8
  class RMSNormalization(keras.layers.Layer):
9
- """
10
- Root Mean Square (RMS) Normalization layer.
9
+ """Root Mean Square (RMS) Normalization layer.
10
+
11
11
  This layer normalizes the input tensor based on its RMS value and applies
12
12
  a learned scaling factor.
13
+
13
14
  Args:
14
15
  input_dim: int. The dimensionality of the input tensor.
15
16
  """
@@ -21,12 +22,13 @@ class RMSNormalization(keras.layers.Layer):
21
22
  )
22
23
 
23
24
  def call(self, x):
24
- """
25
- Applies RMS normalization to the input tensor.
25
+ """Applies RMS normalization to the input tensor.
26
+
26
27
  Args:
27
- x: KerasTensor. Input tensor of shape (batch_size, input_dim).
28
+ x: Input tensor of shape (batch_size, input_dim).
29
+
28
30
  Returns:
29
- KerasTensor: The RMS-normalized tensor of the same shape (batch_size, input_dim),
31
+ The RMS-normalized tensor of the same shape (batch_size, input_dim),
30
32
  scaled by the learned `scale` parameter.
31
33
  """
32
34
  x = ops.cast(x, float)
@@ -11,7 +11,8 @@ class RotaryEmbedding(keras.layers.Layer):
11
11
  This layer encodes absolute positional information with a rotation
12
12
  matrix. It calculates the rotary encoding with a mix of sine and
13
13
  cosine functions with geometrically increasing wavelengths.
14
- Defined and formulated in [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864v4).
14
+ Defined and formulated in
15
+ [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864v4).
15
16
  The input must be a tensor with shape a sequence dimension and a feature
16
17
  dimension. Typically, this will either an input with shape
17
18
  `(batch_size, sequence_length, feature_length)` or
@@ -65,7 +66,7 @@ class RotaryEmbedding(keras.layers.Layer):
65
66
  scaling_factor=1.0,
66
67
  sequence_axis=1,
67
68
  feature_axis=-1,
68
- **kwargs
69
+ **kwargs,
69
70
  ):
70
71
  super().__init__(**kwargs)
71
72
  self.max_wavelength = max_wavelength
@@ -58,7 +58,7 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
58
58
  tie_weights=True,
59
59
  embeddings_initializer="uniform",
60
60
  mask_zero=False,
61
- **kwargs
61
+ **kwargs,
62
62
  ):
63
63
  super().__init__(**kwargs)
64
64
  if vocabulary_size is None:
@@ -5,12 +5,13 @@ from keras_hub.src.api_export import keras_hub_export
5
5
  from keras_hub.src.layers.modeling.cached_multi_head_attention import (
6
6
  CachedMultiHeadAttention,
7
7
  )
8
- from keras_hub.src.utils.keras_utils import clone_initializer
9
-
10
- from keras_hub.src.layers.modeling.transformer_layer_utils import ( # isort:skip
8
+ from keras_hub.src.layers.modeling.transformer_layer_utils import (
11
9
  compute_causal_mask,
10
+ )
11
+ from keras_hub.src.layers.modeling.transformer_layer_utils import (
12
12
  merge_padding_and_attention_mask,
13
13
  )
14
+ from keras_hub.src.utils.keras_utils import clone_initializer
14
15
 
15
16
 
16
17
  @keras_hub_export("keras_hub.layers.TransformerDecoder")
@@ -265,13 +266,13 @@ class TransformerDecoder(keras.layers.Layer):
265
266
  `[batch_size, decoder_sequence_length]`.
266
267
  decoder_attention_mask: a boolean Tensor. Customized decoder
267
268
  sequence mask, must be of shape
268
- `[batch_size, decoder_sequence_length, decoder_sequence_length]`.
269
+ `[batch_size, decoder_sequence_length, decoder_sequence_length]`
269
270
  encoder_padding_mask: a boolean Tensor, the padding mask of encoder
270
271
  sequence, must be of shape
271
272
  `[batch_size, encoder_sequence_length]`.
272
273
  encoder_attention_mask: a boolean Tensor. Customized encoder
273
274
  sequence mask, must be of shape
274
- `[batch_size, encoder_sequence_length, encoder_sequence_length]`.
275
+ `[batch_size, encoder_sequence_length, encoder_sequence_length]`
275
276
  self_attention_cache: a dense float Tensor. The cache of key/values
276
277
  pairs in the self-attention layer. Has shape
277
278
  `[batch_size, 2, max_seq_len, num_heads, key_dims]`.
@@ -435,7 +436,8 @@ class TransformerDecoder(keras.layers.Layer):
435
436
  input_length = output_length = ops.shape(decoder_sequence)[1]
436
437
  # We need to handle a rectangular causal mask when doing cached
437
438
  # decoding. For generative inference, `decoder_sequence` will
438
- # generally be length 1, and `cache` will be the full generation length.
439
+ # generally be length 1, and `cache` will be the full generation
440
+ # length.
439
441
  if self_attention_cache is not None:
440
442
  input_length = ops.shape(self_attention_cache)[2]
441
443
 
@@ -190,7 +190,9 @@ class TransformerEncoder(keras.layers.Layer):
190
190
  [batch_size, sequence_length, sequence_length].
191
191
  training: a boolean indicating whether the layer should behave in
192
192
  training mode or in inference mode.
193
- return_attention_scores: a boolean indicating whether the output should be `(attention_output, attention_scores)` if `True` or `attention_output` if `False`. Defaults to `False`.
193
+ return_attention_scores: a boolean indicating whether the output
194
+ should be `(attention_output, attention_scores)` if `True` or
195
+ `attention_output` if `False`. Defaults to `False`.
194
196
 
195
197
  Returns:
196
198
  A Tensor of the same shape as the `inputs`.
@@ -164,7 +164,7 @@ class Bleu(keras.metrics.Metric):
164
164
  return inputs
165
165
 
166
166
  def _get_ngrams(self, segment, max_order):
167
- """Extracts all n-grams up to a given maximum order from an input segment.
167
+ """Extracts all n-grams up to a given maximum order from an input.
168
168
 
169
169
  Uses Python ops. Inspired from
170
170
  https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py.
@@ -20,10 +20,10 @@ from keras_hub.src.models.text_classifier import TextClassifier
20
20
  class AlbertTextClassifier(TextClassifier):
21
21
  """An end-to-end ALBERT model for classification tasks
22
22
 
23
- This model attaches a classification head to a `keras_hub.model.AlbertBackbone`
24
- backbone, mapping from the backbone outputs to logit output suitable for
25
- a classification task. For usage of this model with pre-trained weights, see
26
- the `from_preset()` method.
23
+ This model attaches a classification head to a
24
+ `keras_hub.model.AlbertBackbone` backbone, mapping from the backbone outputs
25
+ to logit output suitable for a classification task. For usage of this model
26
+ with pre-trained weights, see the `from_preset()` method.
27
27
 
28
28
  This model can optionally be configured with a `preprocessor` layer, in
29
29
  which case it will automatically apply preprocessing to raw inputs during
@@ -36,9 +36,9 @@ class AlbertTextClassifier(TextClassifier):
36
36
  Args:
37
37
  backbone: A `keras_hub.models.AlertBackbone` instance.
38
38
  num_classes: int. Number of classes to predict.
39
- preprocessor: A `keras_hub.models.AlbertTextClassifierPreprocessor` or `None`. If
40
- `None`, this model will not apply preprocessing, and inputs should
41
- be preprocessed before calling the model.
39
+ preprocessor: A `keras_hub.models.AlbertTextClassifierPreprocessor` or
40
+ `None`. If `None`, this model will not apply preprocessing, and
41
+ inputs should be preprocessed before calling the model.
42
42
  activation: Optional `str` or callable. The
43
43
  activation function to use on the model outputs. Set
44
44
  `activation="softmax"` to return output probabilities.
@@ -22,9 +22,9 @@ class BartBackbone(Backbone):
22
22
  described in
23
23
  ["BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension"](https://arxiv.org/abs/1910.13461).
24
24
 
25
- The default constructor gives a fully customizable, randomly initialized BART
26
- model with any number of layers, heads, and embedding dimensions. To load
27
- preset architectures and weights, use the `from_preset` constructor.
25
+ The default constructor gives a fully customizable, randomly initialized
26
+ BART model with any number of layers, heads, and embedding dimensions. To
27
+ load preset architectures and weights, use the `from_preset` constructor.
28
28
 
29
29
  Disclaimer: Pre-trained models are provided on an "as is" basis, without
30
30
  warranties or conditions of any kind. The underlying model is provided by a
@@ -78,7 +78,7 @@ class BartBackbone(Backbone):
78
78
  )
79
79
  output = model(input_data)
80
80
  ```
81
- """
81
+ """ # noqa: E501
82
82
 
83
83
  def __init__(
84
84
  self,
@@ -60,7 +60,8 @@ class BartSeq2SeqLM(Seq2SeqLM):
60
60
  bart_lm.generate("The quick brown fox", max_length=30)
61
61
  ```
62
62
 
63
- Use `generate()` with encoder inputs and an incomplete decoder input (prompt).
63
+ Use `generate()` with encoder inputs and an incomplete decoder input
64
+ (prompt).
64
65
  ```python
65
66
  bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset("bart_base_en")
66
67
  bart_lm.generate(
@@ -79,10 +80,10 @@ class BartSeq2SeqLM(Seq2SeqLM):
79
80
  prompt = {
80
81
  "encoder_token_ids": np.array([[0, 133, 2119, 6219, 23602, 2, 1, 1]]),
81
82
  "encoder_padding_mask": np.array(
82
- [[True, True, True, True, True, True, False, False]]
83
+ [[1, 1, 1, 1, 1, 1, 0, 0]]
83
84
  ),
84
85
  "decoder_token_ids": np.array([[2, 0, 133, 1769, 2, 1, 1]]),
85
- "decoder_padding_mask": np.array([[True, True, True, True, False, False]])
86
+ "decoder_padding_mask": np.array([[1, 1, 1, 1, 0, 0]])
86
87
  }
87
88
 
88
89
  bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset(
@@ -95,7 +96,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
95
96
  Call `fit()` on a single batch.
96
97
  ```python
97
98
  features = {
98
- "encoder_text": ["The quick brown fox jumped.", "I forgot my homework."],
99
+ "encoder_text": ["The quick fox jumped.", "I forgot my homework."],
99
100
  "decoder_text": ["The fast hazel fox leapt.", "I forgot my assignment."]
100
101
  }
101
102
  bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset("bart_base_en")
@@ -195,7 +196,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
195
196
  cross_attention_cache=None,
196
197
  cross_attention_cache_update_index=None,
197
198
  ):
198
- """Forward pass with a key/value caches for generative decoding..
199
+ """Forward pass with a key/value caches for generative decoding.
199
200
 
200
201
  `call_decoder_with_cache` adds an additional inference-time forward pass
201
202
  for the model for seq2seq text generation. Unlike calling the model
@@ -241,7 +242,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
241
242
  key/value cache in the decoder's self-attention layer and
242
243
  `cross_attention_cache` is the key/value cache in the decoder's
243
244
  cross-attention layer.
244
- """
245
+ """ # noqa: E501
245
246
  # Embedding layers.
246
247
  tokens = self.backbone.token_embedding(decoder_token_ids)
247
248
  positions = self.backbone.decoder_position_embedding(
@@ -331,7 +332,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
331
332
  def _build_cache(
332
333
  self, encoder_token_ids, encoder_padding_mask, decoder_token_ids
333
334
  ):
334
- """Builds the self-attention cache and the cross-attention cache (key/value pairs)."""
335
+ """Builds the self-attention cache and the cross-attention cache."""
335
336
  encoder_hidden_states = self.call_encoder(
336
337
  token_ids=encoder_token_ids, padding_mask=encoder_padding_mask
337
338
  )
@@ -417,7 +418,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
417
418
  prompt = ops.slice(prompt, [0, cache_index], [num_samples, 1])
418
419
 
419
420
  def repeat_tensor(x):
420
- """Repeats tensors along batch axis to match dim for beam search."""
421
+ """Repeats along batch axis to match dim for beam search."""
421
422
  if ops.shape(x)[0] == num_samples:
422
423
  return x
423
424
  return ops.repeat(x, repeats=num_samples // batch_size, axis=0)
@@ -69,7 +69,8 @@ backbone_presets = {
69
69
  "bert_base_multi": {
70
70
  "metadata": {
71
71
  "description": (
72
- "12-layer BERT model where case is maintained. Trained on trained on Wikipedias of 104 languages"
72
+ "12-layer BERT model where case is maintained. Trained on "
73
+ "trained on Wikipedias of 104 languages"
73
74
  ),
74
75
  "params": 177853440,
75
76
  "path": "bert",
@@ -101,7 +102,8 @@ backbone_presets = {
101
102
  "bert_tiny_en_uncased_sst2": {
102
103
  "metadata": {
103
104
  "description": (
104
- "The bert_tiny_en_uncased backbone model fine-tuned on the SST-2 sentiment analysis dataset."
105
+ "The bert_tiny_en_uncased backbone model fine-tuned on the "
106
+ "SST-2 sentiment analysis dataset."
105
107
  ),
106
108
  "params": 4385920,
107
109
  "path": "bert",
@@ -34,9 +34,9 @@ class BertTextClassifier(TextClassifier):
34
34
  Args:
35
35
  backbone: A `keras_hub.models.BertBackbone` instance.
36
36
  num_classes: int. Number of classes to predict.
37
- preprocessor: A `keras_hub.models.BertTextClassifierPreprocessor` or `None`. If
38
- `None`, this model will not apply preprocessing, and inputs should
39
- be preprocessed before calling the model.
37
+ preprocessor: A `keras_hub.models.BertTextClassifierPreprocessor` or
38
+ `None`. If `None`, this model will not apply preprocessing, and
39
+ inputs should be preprocessed before calling the model.
40
40
  activation: Optional `str` or callable. The
41
41
  activation function to use on the model outputs. Set
42
42
  `activation="softmax"` to return output probabilities.
@@ -303,16 +303,18 @@ class CausalLM(Task):
303
303
  `preprocessor`. If `preprocessor` is `None`, `inputs` should be
304
304
  should be padded to the desired maximum length and this argument
305
305
  will be ignored.
306
- stop_token_ids: Optional. `None`, "auto", or tuple of token ids. Defaults
307
- to "auto" which uses the `preprocessor.tokenizer.end_token_id`.
308
- Not specifying a processor will produce an error. None stops
309
- generation after generating `max_length` tokens. You may also
310
- specify a list of token id's the model should stop on. Note that
311
- sequences of tokens will each be interpreted as a stop token,
312
- multi-token stop sequences are not supported.
313
- strip_prompt: Optional. By default, generate() returns the full prompt
314
- followed by its completion generated by the model. If this option
315
- is set to True, only the newly generated text is returned.
306
+ stop_token_ids: Optional. `None`, "auto", or tuple of token ids.
307
+ Defaults to "auto" which uses the
308
+ `preprocessor.tokenizer.end_token_id`. Not specifying a
309
+ processor will produce an error. None stops generation after
310
+ generating `max_length` tokens. You may also specify a list of
311
+ token id's the model should stop on. Note that sequences of
312
+ tokens will each be interpreted as a stop token, multi-token
313
+ stop sequences are not supported.
314
+ strip_prompt: Optional. By default, generate() returns the full
315
+ prompt followed by its completion generated by the model. If
316
+ this option is set to True, only the newly generated text is
317
+ returned.
316
318
  """
317
319
  # Setup our three main passes.
318
320
  # 1. Optionally preprocessing strings to dense integer tensors.
@@ -322,10 +324,11 @@ class CausalLM(Task):
322
324
 
323
325
  if self.preprocessor is None and stop_token_ids == "auto":
324
326
  raise ValueError(
325
- 'A `preprocessor` must be attached to the model if `stop_token_ids="auto"`. '
326
- "Currently `preprocessor=None`. To call `generate()` with preprocessing "
327
- "detached, either pass `stop_token_ids=None` to always generate until "
328
- "`max_length` or pass a tuple of token ids that should terminate generation "
327
+ "A `preprocessor` must be attached to the model if "
328
+ '`stop_token_ids="auto"`. Currently `preprocessor=None`. To '
329
+ "call `generate()` with preprocessing detached, either pass "
330
+ "`stop_token_ids=None` to always generate until `max_length` "
331
+ "or pass a tuple of token ids that should terminate generation "
329
332
  "as `stop_token_ids`."
330
333
  )
331
334
  elif stop_token_ids == "auto":
@@ -365,7 +368,8 @@ class CausalLM(Task):
365
368
  y["padding_mask"] = ops.logical_xor(
366
369
  roll_sequence(prompt_mask), roll_sequence(x["padding_mask"])
367
370
  )
368
- # we assume the mask is enough and there is no need to zero-out the values
371
+ # we assume the mask is enough and there is no need to zero-out the
372
+ # values
369
373
  y["token_ids"] = roll_sequence(x["token_ids"])
370
374
 
371
375
  return y
@@ -12,7 +12,7 @@ class CLIPVisionEmbedding(layers.Layer):
12
12
  image_size,
13
13
  data_format=None,
14
14
  dtype=None,
15
- **kwargs
15
+ **kwargs,
16
16
  ):
17
17
  super().__init__(dtype=dtype, **kwargs)
18
18
  self.hidden_dim = int(hidden_dim)
@@ -360,7 +360,8 @@ def apply_cross_stage_partial(
360
360
  """
361
361
 
362
362
  if name is None:
363
- name = f"cross_stage_partial_{keras.backend.get_uid('cross_stage_partial')}"
363
+ uid = keras.backend.get_uid("cross_stage_partial")
364
+ name = f"cross_stage_partial_{uid}"
364
365
 
365
366
  def apply(inputs):
366
367
  hidden_channels = filters // 2
@@ -83,7 +83,7 @@ class DebertaV3Backbone(Backbone):
83
83
  # Call the model on the input data.
84
84
  model(input_data)
85
85
  ```
86
- """
86
+ """ # noqa: E501
87
87
 
88
88
  def __init__(
89
89
  self,
@@ -7,7 +7,7 @@ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
7
7
  from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
8
8
  deberta_kernel_initializer,
9
9
  )
10
- from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import (
10
+ from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import ( # noqa: E501
11
11
  DebertaV3TextClassifierPreprocessor,
12
12
  )
13
13
  from keras_hub.src.models.text_classifier import TextClassifier
@@ -43,9 +43,9 @@ class DebertaV3TextClassifier(TextClassifier):
43
43
  Args:
44
44
  backbone: A `keras_hub.models.DebertaV3` instance.
45
45
  num_classes: int. Number of classes to predict.
46
- preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor` or `None`. If
47
- `None`, this model will not apply preprocessing, and inputs should
48
- be preprocessed before calling the model.
46
+ preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor`
47
+ or `None`. If `None`, this model will not apply preprocessing, and
48
+ inputs should be preprocessed before calling the model.
49
49
  activation: Optional `str` or callable. The
50
50
  activation function to use on the model outputs. Set
51
51
  `activation="softmax"` to return output probabilities.
@@ -45,7 +45,7 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
45
45
  bias_initializer: string or `keras.initializers` initializer.
46
46
  The bias initializer for the dense and disentangled
47
47
  self-attention layers. Defaults to `"zeros"`.
48
- """
48
+ """ # noqa: E501
49
49
 
50
50
  def __init__(
51
51
  self,
@@ -58,7 +58,7 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
58
58
  layer_norm_epsilon=1e-05,
59
59
  kernel_initializer="glorot_uniform",
60
60
  bias_initializer="zeros",
61
- **kwargs
61
+ **kwargs,
62
62
  ):
63
63
  super().__init__(**kwargs)
64
64
  self.intermediate_dim = intermediate_dim
@@ -145,8 +145,8 @@ class DisentangledAttentionEncoder(keras.layers.Layer):
145
145
  """Forward pass of `DisentangledAttentionEncoder`.
146
146
 
147
147
  Args:
148
- inputs: a Tensor. The input data to `DisentangledAttentionEncoder`, should be
149
- of shape [batch_size, sequence_length, hidden_dim].
148
+ inputs: a Tensor. The input data to `DisentangledAttentionEncoder`,
149
+ should be of shape [batch_size, sequence_length, hidden_dim].
150
150
  rel_embeddings: a Tensor. The relative position embedding matrix,
151
151
  should be of shape `[batch_size, 2 * bucket_size, hidden_dim]`.
152
152
  padding_mask: a boolean Tensor. It indicates if the token should be
@@ -31,7 +31,7 @@ class DisentangledSelfAttention(keras.layers.Layer):
31
31
  bias_initializer: string or `keras.initializers` initializer.
32
32
  The bias initializer for the dense layers.
33
33
  Defaults to `"zeros"`.
34
- """
34
+ """ # noqa: E501
35
35
 
36
36
  def __init__(
37
37
  self,
@@ -363,7 +363,8 @@ class DisentangledSelfAttention(keras.layers.Layer):
363
363
  training=training,
364
364
  )
365
365
 
366
- # Reshape `attention_output` to `(batch_size, sequence_length, hidden_dim)`.
366
+ # Reshape `attention_output` to
367
+ # `(batch_size, sequence_length, hidden_dim)`.
367
368
  attention_output = ops.reshape(
368
369
  attention_output,
369
370
  [
@@ -20,7 +20,7 @@ class RelativeEmbedding(keras.layers.Layer):
20
20
  kernel_initializer: string or `keras.initializers` initializer.
21
21
  The kernel initializer for the dense embedding.
22
22
  Defaults to `"glorot_uniform"`.
23
- """
23
+ """ # noqa: E501
24
24
 
25
25
  def __init__(
26
26
  self,
@@ -12,10 +12,10 @@ class DeepLabV3Backbone(Backbone):
12
12
  """DeepLabV3 & DeepLabV3Plus architecture for semantic segmentation.
13
13
 
14
14
  This class implements a DeepLabV3 & DeepLabV3Plus architecture as described
15
- in [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](
16
- https://arxiv.org/abs/1802.02611)(ECCV 2018)
17
- and [Rethinking Atrous Convolution for Semantic Image Segmentation](
18
- https://arxiv.org/abs/1706.05587)(CVPR 2017)
15
+ in [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611)
16
+ (ECCV 2018)
17
+ and [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
18
+ (CVPR 2017)
19
19
 
20
20
  Args:
21
21
  image_encoder: `keras.Model`. An instance that is used as a feature
@@ -35,20 +35,23 @@ class DeepLabV3Backbone(Backbone):
35
35
  layer resolution should match with the `low_level_feature`s layer
36
36
  resolution to concatenate both the layers for combined encoder
37
37
  outputs.
38
- dilation_rates: list. A `list` of integers for parallel dilated conv applied to
39
- `SpatialPyramidPooling`. Usually a
38
+ dilation_rates: list. A `list` of integers for parallel dilated conv
39
+ applied to `SpatialPyramidPooling`. Usually a
40
40
  sample choice of rates are `[6, 12, 18]`.
41
- low_level_feature_key: str optional. A layer level to extract the feature
42
- from one of the key from the `image_encoder`s `pyramid_outputs`
43
- property such as "P2", "P3" etc which will be the Decoder block.
44
- Required only when the DeepLabV3Plus architecture needs to be applied.
41
+ low_level_feature_key: str optional. A layer level to extract the
42
+ feature from one of the key from the `image_encoder`s
43
+ `pyramid_outputs` property such as "P2", "P3" etc which will be the
44
+ Decoder block. Required only when the DeepLabV3Plus architecture
45
+ needs to be applied.
45
46
  image_shape: tuple. The input shape without the batch size.
46
47
  Defaults to `(None, None, 3)`.
47
48
 
48
49
  Example:
49
50
  ```python
50
51
  # Load a trained backbone to extract features from it's `pyramid_outputs`.
51
- image_encoder = keras_hub.models.ResNetBackbone.from_preset("resnet_50_imagenet")
52
+ image_encoder = keras_hub.models.ResNetBackbone.from_preset(
53
+ "resnet_50_imagenet"
54
+ )
52
55
 
53
56
  model = keras_hub.models.DeepLabV3Backbone(
54
57
  image_encoder=image_encoder,
@@ -59,7 +62,7 @@ class DeepLabV3Backbone(Backbone):
59
62
  dilation_rates = [6, 12, 18]
60
63
  )
61
64
  ```
62
- """
65
+ """ # noqa: E501
63
66
 
64
67
  def __init__(
65
68
  self,
@@ -74,7 +77,8 @@ class DeepLabV3Backbone(Backbone):
74
77
  ):
75
78
  if not isinstance(image_encoder, keras.Model):
76
79
  raise ValueError(
77
- "Argument `image_encoder` must be a `keras.Model` instance. Received instead "
80
+ "Argument `image_encoder` must be a `keras.Model` instance. "
81
+ "Received instead "
78
82
  f"{image_encoder} (of type {type(image_encoder)})."
79
83
  )
80
84
  data_format = keras.config.image_data_format()
@@ -4,9 +4,10 @@ backbone_presets = {
4
4
  "deeplab_v3_plus_resnet50_pascalvoc": {
5
5
  "metadata": {
6
6
  "description": (
7
- "DeepLabV3+ model with ResNet50 as image encoder and trained on "
8
- "augmented Pascal VOC dataset by Semantic Boundaries Dataset(SBD)"
9
- "which is having categorical accuracy of 90.01 and 0.63 Mean IoU."
7
+ "DeepLabV3+ model with ResNet50 as image encoder and trained "
8
+ "on augmented Pascal VOC dataset by Semantic Boundaries "
9
+ "Dataset(SBD) which is having categorical accuracy of 90.01 "
10
+ "and 0.63 Mean IoU."
10
11
  ),
11
12
  "params": 39190656,
12
13
  "path": "deeplab_v3",
@@ -4,7 +4,7 @@ from keras_hub.src.api_export import keras_hub_export
4
4
  from keras_hub.src.models.deeplab_v3.deeplab_v3_backbone import (
5
5
  DeepLabV3Backbone,
6
6
  )
7
- from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor import (
7
+ from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor import ( # noqa: E501
8
8
  DeepLabV3ImageSegmenterPreprocessor,
9
9
  )
10
10
  from keras_hub.src.models.image_segmenter import ImageSegmenter