keras-hub-nightly 0.19.0.dev202412120352__py3-none-any.whl → 0.19.0.dev202412140350__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +1 -0
- keras_hub/api/models/__init__.py +11 -6
- keras_hub/api/tokenizers/__init__.py +1 -1
- keras_hub/src/bounding_box/converters.py +2 -2
- keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
- keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
- keras_hub/src/layers/modeling/rms_normalization.py +8 -6
- keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
- keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
- keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
- keras_hub/src/layers/modeling/transformer_encoder.py +3 -1
- keras_hub/src/metrics/bleu.py +1 -1
- keras_hub/src/models/albert/albert_text_classifier.py +7 -7
- keras_hub/src/models/bart/bart_backbone.py +4 -4
- keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
- keras_hub/src/models/bert/bert_presets.py +4 -2
- keras_hub/src/models/bert/bert_text_classifier.py +3 -3
- keras_hub/src/models/causal_lm.py +19 -15
- keras_hub/src/models/clip/clip_vision_embedding.py +1 -1
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
- keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
- keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
- keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +17 -13
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +4 -3
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +1 -1
- keras_hub/src/models/densenet/densenet_backbone.py +3 -1
- keras_hub/src/models/densenet/densenet_image_classifier.py +1 -1
- keras_hub/src/models/densenet/densenet_presets.py +6 -6
- keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/distil_bert/distil_bert_presets.py +2 -1
- keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
- keras_hub/src/models/efficientnet/cba.py +1 -1
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +20 -8
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +1 -1
- keras_hub/src/models/efficientnet/efficientnet_presets.py +12 -11
- keras_hub/src/models/efficientnet/fusedmbconv.py +3 -5
- keras_hub/src/models/efficientnet/mbconv.py +1 -1
- keras_hub/src/models/electra/electra_backbone.py +2 -2
- keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
- keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
- keras_hub/src/models/falcon/falcon_backbone.py +5 -3
- keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
- keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
- keras_hub/src/models/flux/flux_layers.py +46 -44
- keras_hub/src/models/flux/flux_maths.py +24 -17
- keras_hub/src/models/flux/flux_model.py +24 -19
- keras_hub/src/models/flux/flux_presets.py +2 -1
- keras_hub/src/models/flux/flux_text_to_image.py +7 -3
- keras_hub/src/models/gemma/gemma_backbone.py +27 -20
- keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
- keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
- keras_hub/src/models/gemma/gemma_presets.py +9 -3
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
- keras_hub/src/models/image_classifier_preprocessor.py +4 -1
- keras_hub/src/models/image_object_detector.py +2 -2
- keras_hub/src/models/image_object_detector_preprocessor.py +4 -4
- keras_hub/src/models/image_segmenter_preprocessor.py +2 -2
- keras_hub/src/models/llama/llama_backbone.py +34 -26
- keras_hub/src/models/llama3/llama3_backbone.py +12 -11
- keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
- keras_hub/src/models/mistral/mistral_backbone.py +16 -15
- keras_hub/src/models/mistral/mistral_causal_lm.py +3 -3
- keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
- keras_hub/src/models/mit/mit_backbone.py +4 -3
- keras_hub/src/models/mit/mit_layers.py +2 -1
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +7 -7
- keras_hub/src/models/opt/opt_causal_lm.py +2 -2
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +5 -3
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +2 -2
- keras_hub/src/models/phi3/phi3_decoder.py +0 -1
- keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
- keras_hub/src/models/preprocessor.py +2 -2
- keras_hub/src/models/retinanet/feature_pyramid.py +3 -2
- keras_hub/src/models/retinanet/prediction_head.py +2 -2
- keras_hub/src/models/retinanet/retinanet_backbone.py +2 -2
- keras_hub/src/models/retinanet/retinanet_image_converter.py +1 -1
- keras_hub/src/models/retinanet/retinanet_object_detector.py +5 -6
- keras_hub/src/models/retinanet/retinanet_presets.py +2 -1
- keras_hub/src/models/roberta/roberta_backbone.py +2 -2
- keras_hub/src/models/roberta/roberta_presets.py +4 -2
- keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
- keras_hub/src/models/sam/sam_backbone.py +2 -2
- keras_hub/src/models/sam/sam_image_segmenter.py +6 -5
- keras_hub/src/models/sam/sam_layers.py +5 -3
- keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
- keras_hub/src/models/sam/sam_transformer.py +5 -4
- keras_hub/src/models/segformer/segformer_backbone.py +18 -14
- keras_hub/src/models/segformer/segformer_image_segmenter.py +51 -38
- keras_hub/src/models/segformer/segformer_presets.py +24 -12
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
- keras_hub/src/models/stable_diffusion_3/mmdit.py +20 -1
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +1 -1
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +13 -6
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +2 -2
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +7 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
- keras_hub/src/models/task.py +4 -2
- keras_hub/src/models/text_classifier.py +2 -2
- keras_hub/src/models/text_to_image.py +5 -1
- keras_hub/src/models/vae/vae_layers.py +0 -1
- keras_hub/src/models/vit/__init__.py +5 -0
- keras_hub/src/models/vit/vit_backbone.py +152 -0
- keras_hub/src/models/vit/vit_image_classifier.py +187 -0
- keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vit/vit_image_converter.py +73 -0
- keras_hub/src/models/vit/vit_layers.py +391 -0
- keras_hub/src/models/vit/vit_presets.py +49 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +4 -2
- keras_hub/src/models/vit_det/vit_layers.py +3 -3
- keras_hub/src/models/whisper/whisper_audio_converter.py +1 -3
- keras_hub/src/models/whisper/whisper_backbone.py +6 -5
- keras_hub/src/models/whisper/whisper_decoder.py +3 -5
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
- keras_hub/src/models/xlnet/relative_attention.py +20 -19
- keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
- keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
- keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
- keras_hub/src/samplers/contrastive_sampler.py +2 -3
- keras_hub/src/samplers/sampler.py +2 -1
- keras_hub/src/tests/test_case.py +2 -2
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +2 -2
- keras_hub/src/tokenizers/byte_tokenizer.py +2 -8
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
- keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +7 -12
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +8 -5
- keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +7 -3
- keras_hub/src/utils/preset_utils.py +25 -18
- keras_hub/src/utils/tensor_utils.py +4 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +2 -4
- keras_hub/src/utils/transformers/convert_vit.py +150 -0
- keras_hub/src/utils/transformers/preset_loader.py +23 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/RECORD +148 -140
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.19.0.dev202412120352.dist-info → keras_hub_nightly-0.19.0.dev202412140350.dist-info}/top_level.txt +0 -0
@@ -29,7 +29,9 @@ class DenseNetBackbone(FeaturePyramidBackbone):
|
|
29
29
|
input_data = np.ones(shape=(8, 224, 224, 3))
|
30
30
|
|
31
31
|
# Pretrained backbone
|
32
|
-
model = keras_hub.models.DenseNetBackbone.from_preset(
|
32
|
+
model = keras_hub.models.DenseNetBackbone.from_preset(
|
33
|
+
"densenet_121_imagenet"
|
34
|
+
)
|
33
35
|
model(input_data)
|
34
36
|
|
35
37
|
# Randomly initialized backbone with a custom config
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from keras_hub.src.api_export import keras_hub_export
|
2
2
|
from keras_hub.src.models.densenet.densenet_backbone import DenseNetBackbone
|
3
|
-
from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import (
|
3
|
+
from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import ( # noqa: E501
|
4
4
|
DenseNetImageClassifierPreprocessor,
|
5
5
|
)
|
6
6
|
from keras_hub.src.models.image_classifier import ImageClassifier
|
@@ -4,8 +4,8 @@ backbone_presets = {
|
|
4
4
|
"densenet_121_imagenet": {
|
5
5
|
"metadata": {
|
6
6
|
"description": (
|
7
|
-
"121-layer DenseNet model pre-trained on the ImageNet 1k
|
8
|
-
"at a 224x224 resolution."
|
7
|
+
"121-layer DenseNet model pre-trained on the ImageNet 1k "
|
8
|
+
"dataset at a 224x224 resolution."
|
9
9
|
),
|
10
10
|
"params": 7037504,
|
11
11
|
"path": "densenet",
|
@@ -15,8 +15,8 @@ backbone_presets = {
|
|
15
15
|
"densenet_169_imagenet": {
|
16
16
|
"metadata": {
|
17
17
|
"description": (
|
18
|
-
"169-layer DenseNet model pre-trained on the ImageNet 1k
|
19
|
-
"at a 224x224 resolution."
|
18
|
+
"169-layer DenseNet model pre-trained on the ImageNet 1k "
|
19
|
+
"dataset at a 224x224 resolution."
|
20
20
|
),
|
21
21
|
"params": 12642880,
|
22
22
|
"path": "densenet",
|
@@ -26,8 +26,8 @@ backbone_presets = {
|
|
26
26
|
"densenet_201_imagenet": {
|
27
27
|
"metadata": {
|
28
28
|
"description": (
|
29
|
-
"201-layer DenseNet model pre-trained on the ImageNet 1k
|
30
|
-
"at a 224x224 resolution."
|
29
|
+
"201-layer DenseNet model pre-trained on the ImageNet 1k "
|
30
|
+
"dataset at a 224x224 resolution."
|
31
31
|
),
|
32
32
|
"params": 18321984,
|
33
33
|
"path": "densenet",
|
@@ -8,7 +8,7 @@ from keras_hub.src.models.distil_bert.distil_bert_backbone import (
|
|
8
8
|
from keras_hub.src.models.distil_bert.distil_bert_backbone import (
|
9
9
|
distilbert_kernel_initializer,
|
10
10
|
)
|
11
|
-
from keras_hub.src.models.distil_bert.distil_bert_masked_lm_preprocessor import (
|
11
|
+
from keras_hub.src.models.distil_bert.distil_bert_masked_lm_preprocessor import ( # noqa: E501
|
12
12
|
DistilBertMaskedLMPreprocessor,
|
13
13
|
)
|
14
14
|
from keras_hub.src.models.masked_lm import MaskedLM
|
@@ -17,8 +17,8 @@ class DistilBertMaskedLMPreprocessor(MaskedLMPreprocessor):
|
|
17
17
|
|
18
18
|
This preprocessing layer will prepare inputs for a masked language modeling
|
19
19
|
task. It is primarily intended for use with the
|
20
|
-
`keras_hub.models.DistilBertMaskedLM` task model. Preprocessing will occur
|
21
|
-
multiple steps.
|
20
|
+
`keras_hub.models.DistilBertMaskedLM` task model. Preprocessing will occur
|
21
|
+
in multiple steps.
|
22
22
|
|
23
23
|
1. Tokenize any number of input segments using the `tokenizer`.
|
24
24
|
2. Pack the inputs together using a `keras_hub.layers.MultiSegmentPacker`.
|
@@ -28,7 +28,8 @@ backbone_presets = {
|
|
28
28
|
"distil_bert_base_multi": {
|
29
29
|
"metadata": {
|
30
30
|
"description": (
|
31
|
-
"6-layer DistilBERT model where case is maintained. Trained on
|
31
|
+
"6-layer DistilBERT model where case is maintained. Trained on "
|
32
|
+
"Wikipedias of 104 languages"
|
32
33
|
),
|
33
34
|
"params": 134734080,
|
34
35
|
"path": "distil_bert",
|
@@ -7,7 +7,7 @@ from keras_hub.src.models.distil_bert.distil_bert_backbone import (
|
|
7
7
|
from keras_hub.src.models.distil_bert.distil_bert_backbone import (
|
8
8
|
distilbert_kernel_initializer,
|
9
9
|
)
|
10
|
-
from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import (
|
10
|
+
from keras_hub.src.models.distil_bert.distil_bert_text_classifier_preprocessor import ( # noqa: E501
|
11
11
|
DistilBertTextClassifierPreprocessor,
|
12
12
|
)
|
13
13
|
from keras_hub.src.models.text_classifier import TextClassifier
|
@@ -40,9 +40,9 @@ class DistilBertTextClassifier(TextClassifier):
|
|
40
40
|
Args:
|
41
41
|
backbone: A `keras_hub.models.DistilBert` instance.
|
42
42
|
num_classes: int. Number of classes to predict.
|
43
|
-
preprocessor: A `keras_hub.models.DistilBertTextClassifierPreprocessor`
|
44
|
-
`None`, this model will not apply preprocessing, and
|
45
|
-
be preprocessed before calling the model.
|
43
|
+
preprocessor: A `keras_hub.models.DistilBertTextClassifierPreprocessor`
|
44
|
+
or `None`. If `None`, this model will not apply preprocessing, and
|
45
|
+
inputs should be preprocessed before calling the model.
|
46
46
|
activation: Optional `str` or callable. The
|
47
47
|
activation function to use on the model outputs. Set
|
48
48
|
`activation="softmax"` to return output probabilities.
|
@@ -128,7 +128,7 @@ class DistilBertTextClassifier(TextClassifier):
|
|
128
128
|
)
|
129
129
|
classifier.fit(x=features, y=labels, batch_size=2)
|
130
130
|
```
|
131
|
-
"""
|
131
|
+
""" # noqa: E501
|
132
132
|
|
133
133
|
backbone_cls = DistilBertBackbone
|
134
134
|
preprocessor_cls = DistilBertTextClassifierPreprocessor
|
@@ -16,9 +16,9 @@ class DistilBertTokenizer(WordPieceTokenizer):
|
|
16
16
|
|
17
17
|
This tokenizer class will tokenize raw strings into integer sequences and
|
18
18
|
is based on `keras_hub.tokenizers.WordPieceTokenizer`. Unlike the
|
19
|
-
underlying tokenizer, it will check for all special tokens needed by
|
20
|
-
models and provides a `from_preset()` method to automatically
|
21
|
-
a matching vocabulary for a DistilBERT preset.
|
19
|
+
underlying tokenizer, it will check for all special tokens needed by
|
20
|
+
DistilBERT models and provides a `from_preset()` method to automatically
|
21
|
+
download a matching vocabulary for a DistilBERT preset.
|
22
22
|
|
23
23
|
If input is a batch of strings (rank > 0), the layer will output a
|
24
24
|
`tf.RaggedTensor` where the last dimension of the output is ragged.
|
@@ -361,8 +361,12 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
|
|
361
361
|
config = super().get_config()
|
362
362
|
config.update(
|
363
363
|
{
|
364
|
-
"stackwise_width_coefficients":
|
365
|
-
|
364
|
+
"stackwise_width_coefficients": (
|
365
|
+
self.stackwise_width_coefficients
|
366
|
+
),
|
367
|
+
"stackwise_depth_coefficients": (
|
368
|
+
self.stackwise_depth_coefficients
|
369
|
+
),
|
366
370
|
"dropout": self.dropout,
|
367
371
|
"depth_divisor": self.depth_divisor,
|
368
372
|
"min_depth": self.min_depth,
|
@@ -373,12 +377,18 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
|
|
373
377
|
"stackwise_input_filters": self.stackwise_input_filters,
|
374
378
|
"stackwise_output_filters": self.stackwise_output_filters,
|
375
379
|
"stackwise_expansion_ratios": self.stackwise_expansion_ratios,
|
376
|
-
"stackwise_squeeze_and_excite_ratios":
|
380
|
+
"stackwise_squeeze_and_excite_ratios": (
|
381
|
+
self.stackwise_squeeze_and_excite_ratios
|
382
|
+
),
|
377
383
|
"stackwise_strides": self.stackwise_strides,
|
378
384
|
"stackwise_block_types": self.stackwise_block_types,
|
379
|
-
"stackwise_force_input_filters":
|
385
|
+
"stackwise_force_input_filters": (
|
386
|
+
self.stackwise_force_input_filters
|
387
|
+
),
|
380
388
|
"include_stem_padding": self.include_stem_padding,
|
381
|
-
"use_depth_divisor_as_min_depth":
|
389
|
+
"use_depth_divisor_as_min_depth": (
|
390
|
+
self.use_depth_divisor_as_min_depth
|
391
|
+
),
|
382
392
|
"cap_round_filter_decrease": self.cap_round_filter_decrease,
|
383
393
|
"stem_conv_padding": self.stem_conv_padding,
|
384
394
|
"batch_norm_momentum": self.batch_norm_momentum,
|
@@ -389,7 +399,7 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
|
|
389
399
|
return config
|
390
400
|
|
391
401
|
def _correct_pad_downsample(self, inputs, kernel_size):
|
392
|
-
"""Returns a tuple for zero-padding
|
402
|
+
"""Returns a tuple for zero-padding a 2D convolution with downsampling.
|
393
403
|
|
394
404
|
Args:
|
395
405
|
inputs: Input tensor.
|
@@ -436,9 +446,11 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
|
|
436
446
|
filters_out: integer, the number of output filters.
|
437
447
|
kernel_size: integer, the dimension of the convolution window.
|
438
448
|
strides: integer, the stride of the convolution.
|
439
|
-
activation: activation function to use between each convolutional
|
449
|
+
activation: activation function to use between each convolutional
|
450
|
+
layer.
|
440
451
|
expand_ratio: integer, scaling coefficient for the input filters.
|
441
|
-
se_ratio: float between 0 and 1, fraction to squeeze the input
|
452
|
+
se_ratio: float between 0 and 1, fraction to squeeze the input
|
453
|
+
filters.
|
442
454
|
dropout: float between 0 and 1, fraction of the input units to drop.
|
443
455
|
name: string, block label.
|
444
456
|
|
@@ -2,7 +2,7 @@ from keras_hub.src.api_export import keras_hub_export
|
|
2
2
|
from keras_hub.src.models.efficientnet.efficientnet_backbone import (
|
3
3
|
EfficientNetBackbone,
|
4
4
|
)
|
5
|
-
from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import (
|
5
|
+
from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import ( # noqa: E501
|
6
6
|
EfficientNetImageClassifierPreprocessor,
|
7
7
|
)
|
8
8
|
from keras_hub.src.models.image_classifier import ImageClassifier
|
@@ -15,10 +15,10 @@ backbone_presets = {
|
|
15
15
|
"efficientnet_b0_ra4_e3600_r224_imagenet": {
|
16
16
|
"metadata": {
|
17
17
|
"description": (
|
18
|
-
"EfficientNet B0 model pre-trained on the ImageNet 1k dataset
|
19
|
-
" Ross Wightman. Trained with timm scripts using
|
20
|
-
" inspired by the MobileNet-V4 small, mixed
|
21
|
-
|
18
|
+
"EfficientNet B0 model pre-trained on the ImageNet 1k dataset "
|
19
|
+
"by Ross Wightman. Trained with timm scripts using "
|
20
|
+
"hyper-parameters inspired by the MobileNet-V4 small, mixed "
|
21
|
+
"with go-to hparams from timm and 'ResNet Strikes Back'."
|
22
22
|
),
|
23
23
|
"params": 5288548,
|
24
24
|
"path": "efficientnet",
|
@@ -38,10 +38,10 @@ backbone_presets = {
|
|
38
38
|
"efficientnet_b1_ra4_e3600_r240_imagenet": {
|
39
39
|
"metadata": {
|
40
40
|
"description": (
|
41
|
-
"EfficientNet B1 model pre-trained on the ImageNet 1k dataset
|
42
|
-
" Ross Wightman. Trained with timm scripts using
|
43
|
-
" inspired by the MobileNet-V4 small, mixed
|
44
|
-
|
41
|
+
"EfficientNet B1 model pre-trained on the ImageNet 1k dataset "
|
42
|
+
"by Ross Wightman. Trained with timm scripts using "
|
43
|
+
"hyper-parameters inspired by the MobileNet-V4 small, mixed "
|
44
|
+
"with go-to hparams from timm and 'ResNet Strikes Back'."
|
45
45
|
),
|
46
46
|
"params": 7794184,
|
47
47
|
"path": "efficientnet",
|
@@ -86,7 +86,8 @@ backbone_presets = {
|
|
86
86
|
"description": (
|
87
87
|
"EfficientNet B5 model pre-trained on the ImageNet 12k dataset "
|
88
88
|
"by Ross Wightman. Based on Swin Transformer train / pretrain "
|
89
|
-
"recipe with modifications (related to both DeiT and ConvNeXt
|
89
|
+
"recipe with modifications (related to both DeiT and ConvNeXt "
|
90
|
+
"recipes)."
|
90
91
|
),
|
91
92
|
"params": 30389784,
|
92
93
|
"path": "efficientnet",
|
@@ -181,8 +182,8 @@ backbone_presets = {
|
|
181
182
|
"efficientnet_lite0_ra_imagenet": {
|
182
183
|
"metadata": {
|
183
184
|
"description": (
|
184
|
-
"EfficientNet-Lite model fine-trained on the ImageNet 1k
|
185
|
-
"with RandAugment recipe."
|
185
|
+
"EfficientNet-Lite model fine-trained on the ImageNet 1k "
|
186
|
+
"dataset with RandAugment recipe."
|
186
187
|
),
|
187
188
|
"params": 4652008,
|
188
189
|
"path": "efficientnet",
|
@@ -7,10 +7,8 @@ class FusedMBConvBlock(keras.layers.Layer):
|
|
7
7
|
"""Implementation of the FusedMBConv block
|
8
8
|
|
9
9
|
Also known as a Fused Mobile Inverted Residual Bottleneck block from:
|
10
|
-
|
11
|
-
|
12
|
-
[EfficientNetV2: Smaller Models and Faster Training]
|
13
|
-
(https://arxiv.org/abs/2104.00298v3).
|
10
|
+
[EfficientNet-EdgeTPU](https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
|
11
|
+
[EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298v3).
|
14
12
|
|
15
13
|
FusedMBConv blocks are based on MBConv blocks, and replace the depthwise and
|
16
14
|
1x1 output convolution blocks with a single 3x3 convolution block, fusing
|
@@ -78,7 +76,7 @@ class FusedMBConvBlock(keras.layers.Layer):
|
|
78
76
|
dropout=0.2,
|
79
77
|
nores=False,
|
80
78
|
projection_kernel_size=1,
|
81
|
-
**kwargs
|
79
|
+
**kwargs,
|
82
80
|
):
|
83
81
|
super().__init__(**kwargs)
|
84
82
|
self.input_filters = input_filters
|
@@ -186,8 +186,8 @@ class ElectraBackbone(Backbone):
|
|
186
186
|
# Index of classification token in the vocabulary
|
187
187
|
cls_token_index = 0
|
188
188
|
sequence_output = x
|
189
|
-
# Construct the two ELECTRA outputs. The pooled output is a dense layer
|
190
|
-
# top of the [CLS] token.
|
189
|
+
# Construct the two ELECTRA outputs. The pooled output is a dense layer
|
190
|
+
# on top of the [CLS] token.
|
191
191
|
pooled_output = self.pooled_dense(x[:, cls_token_index, :])
|
192
192
|
super().__init__(
|
193
193
|
inputs={
|
@@ -34,9 +34,9 @@ class FNetTextClassifier(TextClassifier):
|
|
34
34
|
Args:
|
35
35
|
backbone: A `keras_hub.models.FNetBackbone` instance.
|
36
36
|
num_classes: int. Number of classes to predict.
|
37
|
-
preprocessor: A `keras_hub.models.FNetTextClassifierPreprocessor` or
|
38
|
-
`None`, this model will not apply preprocessing, and
|
39
|
-
be preprocessed before calling the model.
|
37
|
+
preprocessor: A `keras_hub.models.FNetTextClassifierPreprocessor` or
|
38
|
+
`None`. If `None`, this model will not apply preprocessing, and
|
39
|
+
inputs should be preprocessed before calling the model.
|
40
40
|
activation: Optional `str` or callable. The
|
41
41
|
activation function to use on the model outputs. Set
|
42
42
|
`activation="softmax"` to return output probabilities.
|
@@ -22,9 +22,9 @@ class FNetTextClassifierPreprocessor(TextClassifierPreprocessor):
|
|
22
22
|
|
23
23
|
1. Tokenize any number of input segments using the `tokenizer`.
|
24
24
|
2. Pack the inputs together using a `keras_hub.layers.MultiSegmentPacker`.
|
25
|
-
|
26
|
-
3. Construct a dictionary with keys `"token_ids"`, and `"segment_ids"`
|
27
|
-
|
25
|
+
with the appropriate `"[CLS]"`, `"[SEP]"` and `"<pad>"` tokens.
|
26
|
+
3. Construct a dictionary with keys `"token_ids"`, and `"segment_ids"`
|
27
|
+
that can be passed directly to `keras_hub.models.FNetBackbone`.
|
28
28
|
|
29
29
|
This layer can be used directly with `tf.data.Dataset.map` to preprocess
|
30
30
|
string data in the `(x, y, sample_weight)` format used by
|
@@ -20,15 +20,17 @@ class FalconBackbone(Backbone):
|
|
20
20
|
Args:
|
21
21
|
vocabulary_size: int. The size of the token vocabulary.
|
22
22
|
num_layers: int. The number of transformer layers.
|
23
|
-
num_attention_heads: int. The number of attention heads for each
|
24
|
-
The hidden size must be divisible by the number of
|
23
|
+
num_attention_heads: int. The number of attention heads for each
|
24
|
+
transformer. The hidden size must be divisible by the number of
|
25
|
+
attention heads.
|
25
26
|
hidden_dim: int. The dimensionality of the embeddings and hidden states.
|
26
27
|
intermediate_dim: int. The output dimension of the first Dense layer in
|
27
28
|
the MLP network of each transformer.
|
28
29
|
layer_norm_epsilon: float. Epsilon for the layer normalization layers in
|
29
30
|
the transformer decoder.
|
30
31
|
attention_dropout_rate: float. Dropout probability for the attention.
|
31
|
-
feedforward_dropout_rate: flaot. Dropout probability for the
|
32
|
+
feedforward_dropout_rate: flaot. Dropout probability for the
|
33
|
+
feedforward.
|
32
34
|
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
|
33
35
|
for model computations and weights. Note that some computations,
|
34
36
|
such as softmax and layer normalization, will always be done at
|
@@ -40,7 +40,9 @@ class FalconCausalLM(CausalLM):
|
|
40
40
|
|
41
41
|
Use `generate()` to do text generation.
|
42
42
|
```python
|
43
|
-
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
43
|
+
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
44
|
+
"falcon_refinedweb_1b_en"
|
45
|
+
)
|
44
46
|
falcon_lm.generate("I want to say", max_length=30)
|
45
47
|
|
46
48
|
# Generate with batched prompts.
|
@@ -49,7 +51,9 @@ class FalconCausalLM(CausalLM):
|
|
49
51
|
|
50
52
|
Compile the `generate()` function with a custom sampler.
|
51
53
|
```python
|
52
|
-
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
54
|
+
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
55
|
+
"falcon_refinedweb_1b_en"
|
56
|
+
)
|
53
57
|
falcon_lm.compile(sampler="top_k")
|
54
58
|
falcon_lm.generate("I want to say", max_length=30)
|
55
59
|
|
@@ -60,7 +64,8 @@ class FalconCausalLM(CausalLM):
|
|
60
64
|
Use `generate()` without preprocessing.
|
61
65
|
```python
|
62
66
|
prompt = {
|
63
|
-
# Token ids for
|
67
|
+
# Token ids for
|
68
|
+
# "<|endoftext|> Keras is".
|
64
69
|
"token_ids": np.array([[50256, 17337, 292, 318]] * 2),
|
65
70
|
# Use `"padding_mask"` to indicate values that should not be overridden.
|
66
71
|
"padding_mask": np.array([[1, 1, 1, 1]] * 2),
|
@@ -76,15 +81,20 @@ class FalconCausalLM(CausalLM):
|
|
76
81
|
Call `fit()` on a single batch.
|
77
82
|
```python
|
78
83
|
features = ["The quick brown fox jumped.", "I forgot my homework."]
|
79
|
-
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
84
|
+
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
85
|
+
"falcon_refinedweb_1b_en"
|
86
|
+
)
|
80
87
|
falcon_lm.fit(x=features, batch_size=2)
|
81
88
|
```
|
82
89
|
|
83
90
|
Call `fit()` without preprocessing.
|
84
91
|
```python
|
85
92
|
x = {
|
86
|
-
# Token ids for
|
87
|
-
"
|
93
|
+
# Token ids for
|
94
|
+
# "<|endoftext|> Keras is deep learning library<|endoftext|>"
|
95
|
+
"token_ids": np.array(
|
96
|
+
[[50256, 17337, 292, 318, 2769,4673,5888, 50256, 0]] * 2
|
97
|
+
),
|
88
98
|
"padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 0]] * 2),
|
89
99
|
}
|
90
100
|
y = np.array([[17337, 292, 318, 2769, 4673, 5888, 50256, 0, 0]] * 2)
|
@@ -164,8 +174,8 @@ class FalconCausalLM(CausalLM):
|
|
164
174
|
Args:
|
165
175
|
token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
|
166
176
|
cache: a dense float Tensor, the cache of key and value.
|
167
|
-
cache_update_index: int, or int Tensor. The index of current inputs
|
168
|
-
whole sequence.
|
177
|
+
cache_update_index: int, or int Tensor. The index of current inputs
|
178
|
+
in the whole sequence.
|
169
179
|
|
170
180
|
Returns:
|
171
181
|
A (logits, hidden_states, cache) tuple. Where `logits` is the
|
@@ -36,7 +36,9 @@ class FalconTokenizer(BytePairTokenizer):
|
|
36
36
|
|
37
37
|
```python
|
38
38
|
# Unbatched input.
|
39
|
-
tokenizer = keras_hub.models.FalconTokenizer.from_preset(
|
39
|
+
tokenizer = keras_hub.models.FalconTokenizer.from_preset(
|
40
|
+
"falcon_refinedweb_1b_en"
|
41
|
+
)
|
40
42
|
tokenizer("The quick brown fox jumped.")
|
41
43
|
|
42
44
|
# Batched input.
|
@@ -49,7 +51,10 @@ class FalconTokenizer(BytePairTokenizer):
|
|
49
51
|
vocab = {"<|endoftext|>": 0, "a": 4, "Ġquick": 5, "Ġfox": 6}
|
50
52
|
merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"]
|
51
53
|
merges += ["Ġ f", "o x", "Ġf ox"]
|
52
|
-
tokenizer = keras_hub.models.FalconTokenizer(
|
54
|
+
tokenizer = keras_hub.models.FalconTokenizer(
|
55
|
+
vocabulary=vocab,
|
56
|
+
merges=merges,
|
57
|
+
)
|
53
58
|
tokenizer("a quick fox.")
|
54
59
|
```
|
55
60
|
"""
|