keras-hub-nightly 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. keras_hub/api/layers/__init__.py +1 -0
  2. keras_hub/api/models/__init__.py +11 -6
  3. keras_hub/api/tokenizers/__init__.py +1 -1
  4. keras_hub/src/bounding_box/converters.py +2 -2
  5. keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
  6. keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
  7. keras_hub/src/layers/modeling/rms_normalization.py +8 -6
  8. keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
  9. keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
  10. keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
  11. keras_hub/src/layers/modeling/transformer_encoder.py +3 -1
  12. keras_hub/src/metrics/bleu.py +1 -1
  13. keras_hub/src/models/albert/albert_text_classifier.py +7 -7
  14. keras_hub/src/models/bart/bart_backbone.py +4 -4
  15. keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
  16. keras_hub/src/models/bert/bert_presets.py +4 -2
  17. keras_hub/src/models/bert/bert_text_classifier.py +3 -3
  18. keras_hub/src/models/causal_lm.py +19 -15
  19. keras_hub/src/models/clip/clip_vision_embedding.py +1 -1
  20. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
  21. keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
  22. keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
  23. keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
  24. keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
  25. keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
  26. keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +17 -13
  27. keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +4 -3
  28. keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +1 -1
  29. keras_hub/src/models/densenet/densenet_backbone.py +3 -1
  30. keras_hub/src/models/densenet/densenet_image_classifier.py +1 -1
  31. keras_hub/src/models/densenet/densenet_presets.py +6 -6
  32. keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
  33. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
  34. keras_hub/src/models/distil_bert/distil_bert_presets.py +2 -1
  35. keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
  36. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
  37. keras_hub/src/models/efficientnet/cba.py +1 -1
  38. keras_hub/src/models/efficientnet/efficientnet_backbone.py +20 -8
  39. keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +1 -1
  40. keras_hub/src/models/efficientnet/efficientnet_presets.py +12 -11
  41. keras_hub/src/models/efficientnet/fusedmbconv.py +3 -5
  42. keras_hub/src/models/efficientnet/mbconv.py +1 -1
  43. keras_hub/src/models/electra/electra_backbone.py +2 -2
  44. keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
  45. keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
  46. keras_hub/src/models/falcon/falcon_backbone.py +5 -3
  47. keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
  48. keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
  49. keras_hub/src/models/flux/flux_layers.py +46 -44
  50. keras_hub/src/models/flux/flux_maths.py +24 -17
  51. keras_hub/src/models/flux/flux_model.py +24 -19
  52. keras_hub/src/models/flux/flux_presets.py +2 -1
  53. keras_hub/src/models/flux/flux_text_to_image.py +7 -3
  54. keras_hub/src/models/gemma/gemma_backbone.py +27 -20
  55. keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
  56. keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
  57. keras_hub/src/models/gemma/gemma_presets.py +9 -3
  58. keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
  59. keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
  60. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
  61. keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
  62. keras_hub/src/models/image_classifier_preprocessor.py +4 -1
  63. keras_hub/src/models/image_object_detector.py +2 -2
  64. keras_hub/src/models/image_object_detector_preprocessor.py +4 -4
  65. keras_hub/src/models/image_segmenter_preprocessor.py +2 -2
  66. keras_hub/src/models/llama/llama_backbone.py +34 -26
  67. keras_hub/src/models/llama3/llama3_backbone.py +12 -11
  68. keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
  69. keras_hub/src/models/mistral/mistral_backbone.py +16 -15
  70. keras_hub/src/models/mistral/mistral_causal_lm.py +3 -3
  71. keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
  72. keras_hub/src/models/mit/mit_backbone.py +4 -3
  73. keras_hub/src/models/mit/mit_layers.py +2 -1
  74. keras_hub/src/models/mobilenet/mobilenet_backbone.py +7 -7
  75. keras_hub/src/models/opt/opt_causal_lm.py +2 -2
  76. keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +5 -3
  77. keras_hub/src/models/pali_gemma/pali_gemma_vit.py +2 -2
  78. keras_hub/src/models/phi3/phi3_decoder.py +0 -1
  79. keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
  80. keras_hub/src/models/preprocessor.py +2 -2
  81. keras_hub/src/models/retinanet/feature_pyramid.py +3 -2
  82. keras_hub/src/models/retinanet/prediction_head.py +2 -2
  83. keras_hub/src/models/retinanet/retinanet_backbone.py +2 -2
  84. keras_hub/src/models/retinanet/retinanet_image_converter.py +1 -1
  85. keras_hub/src/models/retinanet/retinanet_object_detector.py +5 -6
  86. keras_hub/src/models/retinanet/retinanet_presets.py +2 -1
  87. keras_hub/src/models/roberta/roberta_backbone.py +2 -2
  88. keras_hub/src/models/roberta/roberta_presets.py +4 -2
  89. keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
  90. keras_hub/src/models/sam/sam_backbone.py +2 -2
  91. keras_hub/src/models/sam/sam_image_segmenter.py +6 -5
  92. keras_hub/src/models/sam/sam_layers.py +5 -3
  93. keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
  94. keras_hub/src/models/sam/sam_transformer.py +5 -4
  95. keras_hub/src/models/segformer/segformer_backbone.py +18 -14
  96. keras_hub/src/models/segformer/segformer_image_segmenter.py +51 -38
  97. keras_hub/src/models/segformer/segformer_presets.py +24 -12
  98. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
  99. keras_hub/src/models/stable_diffusion_3/mmdit.py +20 -1
  100. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +1 -1
  101. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +13 -6
  102. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +2 -2
  103. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +7 -3
  104. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
  105. keras_hub/src/models/task.py +4 -2
  106. keras_hub/src/models/text_classifier.py +2 -2
  107. keras_hub/src/models/text_to_image.py +5 -1
  108. keras_hub/src/models/vae/vae_layers.py +0 -1
  109. keras_hub/src/models/vit/__init__.py +5 -0
  110. keras_hub/src/models/vit/vit_backbone.py +152 -0
  111. keras_hub/src/models/vit/vit_image_classifier.py +187 -0
  112. keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
  113. keras_hub/src/models/vit/vit_image_converter.py +73 -0
  114. keras_hub/src/models/vit/vit_layers.py +391 -0
  115. keras_hub/src/models/vit/vit_presets.py +49 -0
  116. keras_hub/src/models/vit_det/vit_det_backbone.py +4 -2
  117. keras_hub/src/models/vit_det/vit_layers.py +3 -3
  118. keras_hub/src/models/whisper/whisper_audio_converter.py +1 -3
  119. keras_hub/src/models/whisper/whisper_backbone.py +6 -5
  120. keras_hub/src/models/whisper/whisper_decoder.py +3 -5
  121. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
  122. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
  123. keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
  124. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
  125. keras_hub/src/models/xlnet/relative_attention.py +20 -19
  126. keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
  127. keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
  128. keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
  129. keras_hub/src/samplers/contrastive_sampler.py +2 -3
  130. keras_hub/src/samplers/sampler.py +2 -1
  131. keras_hub/src/tests/test_case.py +2 -2
  132. keras_hub/src/tokenizers/byte_pair_tokenizer.py +2 -2
  133. keras_hub/src/tokenizers/byte_tokenizer.py +2 -8
  134. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
  135. keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +7 -12
  136. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +8 -5
  137. keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +7 -3
  138. keras_hub/src/utils/preset_utils.py +25 -18
  139. keras_hub/src/utils/tensor_utils.py +4 -4
  140. keras_hub/src/utils/timm/convert_efficientnet.py +2 -4
  141. keras_hub/src/utils/transformers/convert_vit.py +150 -0
  142. keras_hub/src/utils/transformers/preset_loader.py +23 -0
  143. keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
  144. keras_hub/src/version_utils.py +1 -1
  145. {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/METADATA +1 -1
  146. {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/RECORD +148 -140
  147. {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/WHEEL +0 -0
  148. {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,9 @@ class DenseNetBackbone(FeaturePyramidBackbone):
29
29
  input_data = np.ones(shape=(8, 224, 224, 3))
30
30
 
31
31
  # Pretrained backbone
32
- model = keras_hub.models.DenseNetBackbone.from_preset("densenet_121_imagenet")
32
+ model = keras_hub.models.DenseNetBackbone.from_preset(
33
+ "densenet_121_imagenet"
34
+ )
33
35
  model(input_data)
34
36
 
35
37
  # Randomly initialized backbone with a custom config
@@ -1,6 +1,6 @@
1
1
  from keras_hub.src.api_export import keras_hub_export
2
2
  from keras_hub.src.models.densenet.densenet_backbone import DenseNetBackbone
3
- from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import (
3
+ from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import ( # noqa: E501
4
4
  DenseNetImageClassifierPreprocessor,
5
5
  )
6
6
  from keras_hub.src.models.image_classifier import ImageClassifier
@@ -4,8 +4,8 @@ backbone_presets = {
4
4
  "densenet_121_imagenet": {
5
5
  "metadata": {
6
6
  "description": (
7
- "121-layer DenseNet model pre-trained on the ImageNet 1k dataset "
8
- "at a 224x224 resolution."
7
+ "121-layer DenseNet model pre-trained on the ImageNet 1k "
8
+ "dataset at a 224x224 resolution."
9
9
  ),
10
10
  "params": 7037504,
11
11
  "path": "densenet",
@@ -15,8 +15,8 @@ backbone_presets = {
15
15
  "densenet_169_imagenet": {
16
16
  "metadata": {
17
17
  "description": (
18
- "169-layer DenseNet model pre-trained on the ImageNet 1k dataset "
19
- "at a 224x224 resolution."
18
+ "169-layer DenseNet model pre-trained on the ImageNet 1k "
19
+ "dataset at a 224x224 resolution."
20
20
  ),
21
21
  "params": 12642880,
22
22
  "path": "densenet",
@@ -26,8 +26,8 @@ backbone_presets = {
26
26
  "densenet_201_imagenet": {
27
27
  "metadata": {
28
28
  "description": (
29
- "201-layer DenseNet model pre-trained on the ImageNet 1k dataset "
30
- "at a 224x224 resolution."
29
+ "201-layer DenseNet model pre-trained on the ImageNet 1k "
30
+ "dataset at a 224x224 resolution."
31
31
  ),
32
32
  "params": 18321984,
33
33
  "path": "densenet",
@@ -8,7 +8,7 @@ from keras_hub.src.models.distil_bert.distil_bert_backbone import (
8
8
  from keras_hub.src.models.distil_bert.distil_bert_backbone import (
9
9
  distilbert_kernel_initializer,
10
10
  )
11
- from keras_hub.src.models.distil_bert.distil_bert_masked_lm_preprocessor import (
11
+ from keras_hub.src.models.distil_bert.distil_bert_masked_lm_preprocessor import ( # noqa: E501
12
12
  DistilBertMaskedLMPreprocessor,
13
13
  )
14
14
  from keras_hub.src.models.masked_lm import MaskedLM
@@ -17,8 +17,8 @@ class DistilBertMaskedLMPreprocessor(MaskedLMPreprocessor):
17
17
 
18
18
  This preprocessing layer will prepare inputs for a masked language modeling
19
19
  task. It is primarily intended for use with the
20
- `keras_hub.models.DistilBertMaskedLM` task model. Preprocessing will occur in
21
- multiple steps.
20
+ `keras_hub.models.DistilBertMaskedLM` task model. Preprocessing will occur
21
+ in multiple steps.
22
22
 
23
23
  1. Tokenize any number of input segments using the `tokenizer`.
24
24
  2. Pack the inputs together using a `keras_hub.layers.MultiSegmentPacker`.
@@ -28,7 +28,8 @@ backbone_presets = {
28
28
  "distil_bert_base_multi": {
29
29
  "metadata": {
30
30
  "description": (
31
- "6-layer DistilBERT model where case is maintained. Trained on Wikipedias of 104 languages"
31
+ "6-layer DistilBERT model where case is maintained. Trained on "
32
+ "Wikipedias of 104 languages"
32
33
  ),
33
34
  "params": 134734080,
34
35
  "path": "distil_bert",
@@ -7,7 +7,7 @@ from keras_hub.src.models.distil_bert.distil_bert_backbone import (
7
7
  from keras_hub.src.models.distil_bert.distil_bert_backbone import (
8
8
  distilbert_kernel_initializer,
9
9
  )
10
- from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import (
10
+ from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import ( # noqa: E501
11
11
  DistilBertTextClassifierPreprocessor,
12
12
  )
13
13
  from keras_hub.src.models.text_classifier import TextClassifier
@@ -40,9 +40,9 @@ class DistilBertTextClassifier(TextClassifier):
40
40
  Args:
41
41
  backbone: A `keras_hub.models.DistilBert` instance.
42
42
  num_classes: int. Number of classes to predict.
43
- preprocessor: A `keras_hub.models.DistilBertTextClassifierPreprocessor` or `None`. If
44
- `None`, this model will not apply preprocessing, and inputs should
45
- be preprocessed before calling the model.
43
+ preprocessor: A `keras_hub.models.DistilBertTextClassifierPreprocessor`
44
+ or `None`. If `None`, this model will not apply preprocessing, and
45
+ inputs should be preprocessed before calling the model.
46
46
  activation: Optional `str` or callable. The
47
47
  activation function to use on the model outputs. Set
48
48
  `activation="softmax"` to return output probabilities.
@@ -128,7 +128,7 @@ class DistilBertTextClassifier(TextClassifier):
128
128
  )
129
129
  classifier.fit(x=features, y=labels, batch_size=2)
130
130
  ```
131
- """
131
+ """ # noqa: E501
132
132
 
133
133
  backbone_cls = DistilBertBackbone
134
134
  preprocessor_cls = DistilBertTextClassifierPreprocessor
@@ -16,9 +16,9 @@ class DistilBertTokenizer(WordPieceTokenizer):
16
16
 
17
17
  This tokenizer class will tokenize raw strings into integer sequences and
18
18
  is based on `keras_hub.tokenizers.WordPieceTokenizer`. Unlike the
19
- underlying tokenizer, it will check for all special tokens needed by DistilBERT
20
- models and provides a `from_preset()` method to automatically download
21
- a matching vocabulary for a DistilBERT preset.
19
+ underlying tokenizer, it will check for all special tokens needed by
20
+ DistilBERT models and provides a `from_preset()` method to automatically
21
+ download a matching vocabulary for a DistilBERT preset.
22
22
 
23
23
  If input is a batch of strings (rank > 0), the layer will output a
24
24
  `tf.RaggedTensor` where the last dimension of the output is ragged.
@@ -43,7 +43,7 @@ class CBABlock(keras.layers.Layer):
43
43
  activation="swish",
44
44
  dropout=0.2,
45
45
  nores=False,
46
- **kwargs
46
+ **kwargs,
47
47
  ):
48
48
  super().__init__(**kwargs)
49
49
  self.input_filters = input_filters
@@ -361,8 +361,12 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
361
361
  config = super().get_config()
362
362
  config.update(
363
363
  {
364
- "stackwise_width_coefficients": self.stackwise_width_coefficients,
365
- "stackwise_depth_coefficients": self.stackwise_depth_coefficients,
364
+ "stackwise_width_coefficients": (
365
+ self.stackwise_width_coefficients
366
+ ),
367
+ "stackwise_depth_coefficients": (
368
+ self.stackwise_depth_coefficients
369
+ ),
366
370
  "dropout": self.dropout,
367
371
  "depth_divisor": self.depth_divisor,
368
372
  "min_depth": self.min_depth,
@@ -373,12 +377,18 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
373
377
  "stackwise_input_filters": self.stackwise_input_filters,
374
378
  "stackwise_output_filters": self.stackwise_output_filters,
375
379
  "stackwise_expansion_ratios": self.stackwise_expansion_ratios,
376
- "stackwise_squeeze_and_excite_ratios": self.stackwise_squeeze_and_excite_ratios,
380
+ "stackwise_squeeze_and_excite_ratios": (
381
+ self.stackwise_squeeze_and_excite_ratios
382
+ ),
377
383
  "stackwise_strides": self.stackwise_strides,
378
384
  "stackwise_block_types": self.stackwise_block_types,
379
- "stackwise_force_input_filters": self.stackwise_force_input_filters,
385
+ "stackwise_force_input_filters": (
386
+ self.stackwise_force_input_filters
387
+ ),
380
388
  "include_stem_padding": self.include_stem_padding,
381
- "use_depth_divisor_as_min_depth": self.use_depth_divisor_as_min_depth,
389
+ "use_depth_divisor_as_min_depth": (
390
+ self.use_depth_divisor_as_min_depth
391
+ ),
382
392
  "cap_round_filter_decrease": self.cap_round_filter_decrease,
383
393
  "stem_conv_padding": self.stem_conv_padding,
384
394
  "batch_norm_momentum": self.batch_norm_momentum,
@@ -389,7 +399,7 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
389
399
  return config
390
400
 
391
401
  def _correct_pad_downsample(self, inputs, kernel_size):
392
- """Returns a tuple for zero-padding for 2D convolution with downsampling.
402
+ """Returns a tuple for zero-padding a 2D convolution with downsampling.
393
403
 
394
404
  Args:
395
405
  inputs: Input tensor.
@@ -436,9 +446,11 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
436
446
  filters_out: integer, the number of output filters.
437
447
  kernel_size: integer, the dimension of the convolution window.
438
448
  strides: integer, the stride of the convolution.
439
- activation: activation function to use between each convolutional layer.
449
+ activation: activation function to use between each convolutional
450
+ layer.
440
451
  expand_ratio: integer, scaling coefficient for the input filters.
441
- se_ratio: float between 0 and 1, fraction to squeeze the input filters.
452
+ se_ratio: float between 0 and 1, fraction to squeeze the input
453
+ filters.
442
454
  dropout: float between 0 and 1, fraction of the input units to drop.
443
455
  name: string, block label.
444
456
 
@@ -2,7 +2,7 @@ from keras_hub.src.api_export import keras_hub_export
2
2
  from keras_hub.src.models.efficientnet.efficientnet_backbone import (
3
3
  EfficientNetBackbone,
4
4
  )
5
- from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import (
5
+ from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import ( # noqa: E501
6
6
  EfficientNetImageClassifierPreprocessor,
7
7
  )
8
8
  from keras_hub.src.models.image_classifier import ImageClassifier
@@ -15,10 +15,10 @@ backbone_presets = {
15
15
  "efficientnet_b0_ra4_e3600_r224_imagenet": {
16
16
  "metadata": {
17
17
  "description": (
18
- "EfficientNet B0 model pre-trained on the ImageNet 1k dataset by"
19
- " Ross Wightman. Trained with timm scripts using hyper-parameters"
20
- " inspired by the MobileNet-V4 small, mixed with go-to hparams "
21
- 'from timm and "ResNet Strikes Back".'
18
+ "EfficientNet B0 model pre-trained on the ImageNet 1k dataset "
19
+ "by Ross Wightman. Trained with timm scripts using "
20
+ "hyper-parameters inspired by the MobileNet-V4 small, mixed "
21
+ "with go-to hparams from timm and 'ResNet Strikes Back'."
22
22
  ),
23
23
  "params": 5288548,
24
24
  "path": "efficientnet",
@@ -38,10 +38,10 @@ backbone_presets = {
38
38
  "efficientnet_b1_ra4_e3600_r240_imagenet": {
39
39
  "metadata": {
40
40
  "description": (
41
- "EfficientNet B1 model pre-trained on the ImageNet 1k dataset by"
42
- " Ross Wightman. Trained with timm scripts using hyper-parameters"
43
- " inspired by the MobileNet-V4 small, mixed with go-to hparams "
44
- 'from timm and "ResNet Strikes Back".'
41
+ "EfficientNet B1 model pre-trained on the ImageNet 1k dataset "
42
+ "by Ross Wightman. Trained with timm scripts using "
43
+ "hyper-parameters inspired by the MobileNet-V4 small, mixed "
44
+ "with go-to hparams from timm and 'ResNet Strikes Back'."
45
45
  ),
46
46
  "params": 7794184,
47
47
  "path": "efficientnet",
@@ -86,7 +86,8 @@ backbone_presets = {
86
86
  "description": (
87
87
  "EfficientNet B5 model pre-trained on the ImageNet 12k dataset "
88
88
  "by Ross Wightman. Based on Swin Transformer train / pretrain "
89
- "recipe with modifications (related to both DeiT and ConvNeXt recipes)."
89
+ "recipe with modifications (related to both DeiT and ConvNeXt "
90
+ "recipes)."
90
91
  ),
91
92
  "params": 30389784,
92
93
  "path": "efficientnet",
@@ -181,8 +182,8 @@ backbone_presets = {
181
182
  "efficientnet_lite0_ra_imagenet": {
182
183
  "metadata": {
183
184
  "description": (
184
- "EfficientNet-Lite model fine-trained on the ImageNet 1k dataset "
185
- "with RandAugment recipe."
185
+ "EfficientNet-Lite model fine-trained on the ImageNet 1k "
186
+ "dataset with RandAugment recipe."
186
187
  ),
187
188
  "params": 4652008,
188
189
  "path": "efficientnet",
@@ -7,10 +7,8 @@ class FusedMBConvBlock(keras.layers.Layer):
7
7
  """Implementation of the FusedMBConv block
8
8
 
9
9
  Also known as a Fused Mobile Inverted Residual Bottleneck block from:
10
- [EfficientNet-EdgeTPU: Creating Accelerator-Optimized Neural Networks with AutoML]
11
- (https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
12
- [EfficientNetV2: Smaller Models and Faster Training]
13
- (https://arxiv.org/abs/2104.00298v3).
10
+ [EfficientNet-EdgeTPU](https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
11
+ [EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298v3).
14
12
 
15
13
  FusedMBConv blocks are based on MBConv blocks, and replace the depthwise and
16
14
  1x1 output convolution blocks with a single 3x3 convolution block, fusing
@@ -78,7 +76,7 @@ class FusedMBConvBlock(keras.layers.Layer):
78
76
  dropout=0.2,
79
77
  nores=False,
80
78
  projection_kernel_size=1,
81
- **kwargs
79
+ **kwargs,
82
80
  ):
83
81
  super().__init__(**kwargs)
84
82
  self.input_filters = input_filters
@@ -18,7 +18,7 @@ class MBConvBlock(keras.layers.Layer):
18
18
  activation="swish",
19
19
  dropout=0.2,
20
20
  nores=False,
21
- **kwargs
21
+ **kwargs,
22
22
  ):
23
23
  """Implementation of the MBConv block
24
24
 
@@ -186,8 +186,8 @@ class ElectraBackbone(Backbone):
186
186
  # Index of classification token in the vocabulary
187
187
  cls_token_index = 0
188
188
  sequence_output = x
189
- # Construct the two ELECTRA outputs. The pooled output is a dense layer on
190
- # top of the [CLS] token.
189
+ # Construct the two ELECTRA outputs. The pooled output is a dense layer
190
+ # on top of the [CLS] token.
191
191
  pooled_output = self.pooled_dense(x[:, cls_token_index, :])
192
192
  super().__init__(
193
193
  inputs={
@@ -34,9 +34,9 @@ class FNetTextClassifier(TextClassifier):
34
34
  Args:
35
35
  backbone: A `keras_hub.models.FNetBackbone` instance.
36
36
  num_classes: int. Number of classes to predict.
37
- preprocessor: A `keras_hub.models.FNetTextClassifierPreprocessor` or `None`. If
38
- `None`, this model will not apply preprocessing, and inputs should
39
- be preprocessed before calling the model.
37
+ preprocessor: A `keras_hub.models.FNetTextClassifierPreprocessor` or
38
+ `None`. If `None`, this model will not apply preprocessing, and
39
+ inputs should be preprocessed before calling the model.
40
40
  activation: Optional `str` or callable. The
41
41
  activation function to use on the model outputs. Set
42
42
  `activation="softmax"` to return output probabilities.
@@ -22,9 +22,9 @@ class FNetTextClassifierPreprocessor(TextClassifierPreprocessor):
22
22
 
23
23
  1. Tokenize any number of input segments using the `tokenizer`.
24
24
  2. Pack the inputs together using a `keras_hub.layers.MultiSegmentPacker`.
25
- with the appropriate `"[CLS]"`, `"[SEP]"` and `"<pad>"` tokens.
26
- 3. Construct a dictionary with keys `"token_ids"`, and `"segment_ids"` that
27
- can be passed directly to `keras_hub.models.FNetBackbone`.
25
+ with the appropriate `"[CLS]"`, `"[SEP]"` and `"<pad>"` tokens.
26
+ 3. Construct a dictionary with keys `"token_ids"`, and `"segment_ids"`
27
+ that can be passed directly to `keras_hub.models.FNetBackbone`.
28
28
 
29
29
  This layer can be used directly with `tf.data.Dataset.map` to preprocess
30
30
  string data in the `(x, y, sample_weight)` format used by
@@ -20,15 +20,17 @@ class FalconBackbone(Backbone):
20
20
  Args:
21
21
  vocabulary_size: int. The size of the token vocabulary.
22
22
  num_layers: int. The number of transformer layers.
23
- num_attention_heads: int. The number of attention heads for each transformer.
24
- The hidden size must be divisible by the number of attention heads.
23
+ num_attention_heads: int. The number of attention heads for each
24
+ transformer. The hidden size must be divisible by the number of
25
+ attention heads.
25
26
  hidden_dim: int. The dimensionality of the embeddings and hidden states.
26
27
  intermediate_dim: int. The output dimension of the first Dense layer in
27
28
  the MLP network of each transformer.
28
29
  layer_norm_epsilon: float. Epsilon for the layer normalization layers in
29
30
  the transformer decoder.
30
31
  attention_dropout_rate: float. Dropout probability for the attention.
31
- feedforward_dropout_rate: flaot. Dropout probability for the feedforward.
32
+ feedforward_dropout_rate: flaot. Dropout probability for the
33
+ feedforward.
32
34
  dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
33
35
  for model computations and weights. Note that some computations,
34
36
  such as softmax and layer normalization, will always be done at
@@ -40,7 +40,9 @@ class FalconCausalLM(CausalLM):
40
40
 
41
41
  Use `generate()` to do text generation.
42
42
  ```python
43
- falcon_lm = keras_hub.models.FalconCausalLM.from_preset("falcon_refinedweb_1b_en")
43
+ falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
44
+ "falcon_refinedweb_1b_en"
45
+ )
44
46
  falcon_lm.generate("I want to say", max_length=30)
45
47
 
46
48
  # Generate with batched prompts.
@@ -49,7 +51,9 @@ class FalconCausalLM(CausalLM):
49
51
 
50
52
  Compile the `generate()` function with a custom sampler.
51
53
  ```python
52
- falcon_lm = keras_hub.models.FalconCausalLM.from_preset("falcon_refinedweb_1b_en")
54
+ falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
55
+ "falcon_refinedweb_1b_en"
56
+ )
53
57
  falcon_lm.compile(sampler="top_k")
54
58
  falcon_lm.generate("I want to say", max_length=30)
55
59
 
@@ -60,7 +64,8 @@ class FalconCausalLM(CausalLM):
60
64
  Use `generate()` without preprocessing.
61
65
  ```python
62
66
  prompt = {
63
- # Token ids for "<|endoftext|> Keras is".
67
+ # Token ids for
68
+ # "<|endoftext|> Keras is".
64
69
  "token_ids": np.array([[50256, 17337, 292, 318]] * 2),
65
70
  # Use `"padding_mask"` to indicate values that should not be overridden.
66
71
  "padding_mask": np.array([[1, 1, 1, 1]] * 2),
@@ -76,15 +81,20 @@ class FalconCausalLM(CausalLM):
76
81
  Call `fit()` on a single batch.
77
82
  ```python
78
83
  features = ["The quick brown fox jumped.", "I forgot my homework."]
79
- falcon_lm = keras_hub.models.FalconCausalLM.from_preset("falcon_refinedweb_1b_en")
84
+ falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
85
+ "falcon_refinedweb_1b_en"
86
+ )
80
87
  falcon_lm.fit(x=features, batch_size=2)
81
88
  ```
82
89
 
83
90
  Call `fit()` without preprocessing.
84
91
  ```python
85
92
  x = {
86
- # Token ids for "<|endoftext|> Keras is deep learning library<|endoftext|>"
87
- "token_ids": np.array([[50256, 17337, 292, 318, 2769, 4673, 5888, 50256, 0]] * 2),
93
+ # Token ids for
94
+ # "<|endoftext|> Keras is deep learning library<|endoftext|>"
95
+ "token_ids": np.array(
96
+ [[50256, 17337, 292, 318, 2769,4673,5888, 50256, 0]] * 2
97
+ ),
88
98
  "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 0]] * 2),
89
99
  }
90
100
  y = np.array([[17337, 292, 318, 2769, 4673, 5888, 50256, 0, 0]] * 2)
@@ -164,8 +174,8 @@ class FalconCausalLM(CausalLM):
164
174
  Args:
165
175
  token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
166
176
  cache: a dense float Tensor, the cache of key and value.
167
- cache_update_index: int, or int Tensor. The index of current inputs in the
168
- whole sequence.
177
+ cache_update_index: int, or int Tensor. The index of current inputs
178
+ in the whole sequence.
169
179
 
170
180
  Returns:
171
181
  A (logits, hidden_states, cache) tuple. Where `logits` is the
@@ -36,7 +36,9 @@ class FalconTokenizer(BytePairTokenizer):
36
36
 
37
37
  ```python
38
38
  # Unbatched input.
39
- tokenizer = keras_hub.models.FalconTokenizer.from_preset("falcon_refinedweb_1b_en")
39
+ tokenizer = keras_hub.models.FalconTokenizer.from_preset(
40
+ "falcon_refinedweb_1b_en"
41
+ )
40
42
  tokenizer("The quick brown fox jumped.")
41
43
 
42
44
  # Batched input.
@@ -49,7 +51,10 @@ class FalconTokenizer(BytePairTokenizer):
49
51
  vocab = {"<|endoftext|>": 0, "a": 4, "Ġquick": 5, "Ġfox": 6}
50
52
  merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"]
51
53
  merges += ["Ġ f", "o x", "Ġf ox"]
52
- tokenizer = keras_hub.models.FalconTokenizer(vocabulary=vocab, merges=merges)
54
+ tokenizer = keras_hub.models.FalconTokenizer(
55
+ vocabulary=vocab,
56
+ merges=merges,
57
+ )
53
58
  tokenizer("a quick fox.")
54
59
  ```
55
60
  """