keras-hub-nightly 0.16.1.dev202410020340__py3-none-any.whl → 0.19.0.dev202501260345__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +21 -3
- keras_hub/api/models/__init__.py +71 -12
- keras_hub/api/tokenizers/__init__.py +1 -1
- keras_hub/src/bounding_box/__init__.py +2 -0
- keras_hub/src/bounding_box/converters.py +102 -12
- keras_hub/src/layers/modeling/f_net_encoder.py +1 -1
- keras_hub/src/layers/modeling/masked_lm_head.py +2 -1
- keras_hub/src/layers/modeling/reversible_embedding.py +3 -16
- keras_hub/src/layers/modeling/rms_normalization.py +36 -0
- keras_hub/src/layers/modeling/rotary_embedding.py +3 -2
- keras_hub/src/layers/modeling/token_and_position_embedding.py +1 -1
- keras_hub/src/layers/modeling/transformer_decoder.py +8 -6
- keras_hub/src/layers/modeling/transformer_encoder.py +29 -7
- keras_hub/src/layers/preprocessing/audio_converter.py +3 -7
- keras_hub/src/layers/preprocessing/image_converter.py +170 -34
- keras_hub/src/metrics/bleu.py +4 -3
- keras_hub/src/models/albert/albert_presets.py +4 -12
- keras_hub/src/models/albert/albert_text_classifier.py +7 -7
- keras_hub/src/models/backbone.py +3 -14
- keras_hub/src/models/bart/bart_backbone.py +4 -4
- keras_hub/src/models/bart/bart_presets.py +3 -9
- keras_hub/src/models/bart/bart_seq_2_seq_lm.py +9 -8
- keras_hub/src/models/basnet/__init__.py +5 -0
- keras_hub/src/models/basnet/basnet.py +122 -0
- keras_hub/src/models/basnet/basnet_backbone.py +366 -0
- keras_hub/src/models/basnet/basnet_image_converter.py +8 -0
- keras_hub/src/models/basnet/basnet_preprocessor.py +14 -0
- keras_hub/src/models/basnet/basnet_presets.py +17 -0
- keras_hub/src/models/bert/bert_presets.py +14 -32
- keras_hub/src/models/bert/bert_text_classifier.py +3 -3
- keras_hub/src/models/bloom/bloom_presets.py +8 -24
- keras_hub/src/models/causal_lm.py +56 -12
- keras_hub/src/models/clip/__init__.py +5 -0
- keras_hub/src/models/clip/clip_backbone.py +286 -0
- keras_hub/src/models/clip/clip_encoder_block.py +19 -4
- keras_hub/src/models/clip/clip_image_converter.py +8 -0
- keras_hub/src/models/clip/clip_presets.py +93 -0
- keras_hub/src/models/clip/clip_text_encoder.py +4 -1
- keras_hub/src/models/clip/clip_tokenizer.py +18 -3
- keras_hub/src/models/clip/clip_vision_embedding.py +101 -0
- keras_hub/src/models/clip/clip_vision_encoder.py +159 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +2 -1
- keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py +0 -109
- keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +1 -1
- keras_hub/src/models/deberta_v3/deberta_v3_presets.py +5 -15
- keras_hub/src/models/deberta_v3/deberta_v3_text_classifier.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +4 -4
- keras_hub/src/models/deberta_v3/disentangled_self_attention.py +3 -2
- keras_hub/src/models/deberta_v3/relative_embedding.py +1 -1
- keras_hub/src/models/deeplab_v3/__init__.py +7 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_backbone.py +200 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_image_converter.py +10 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_image_segmeter_preprocessor.py +16 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_layers.py +215 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +17 -0
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +111 -0
- keras_hub/src/models/densenet/densenet_backbone.py +6 -4
- keras_hub/src/models/densenet/densenet_image_classifier.py +1 -129
- keras_hub/src/models/densenet/densenet_image_converter.py +2 -4
- keras_hub/src/models/densenet/densenet_presets.py +9 -15
- keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +1 -1
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/distil_bert/distil_bert_presets.py +5 -10
- keras_hub/src/models/distil_bert/distil_bert_text_classifier.py +5 -5
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +3 -3
- keras_hub/src/models/efficientnet/__init__.py +9 -0
- keras_hub/src/models/efficientnet/cba.py +141 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +160 -61
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +14 -0
- keras_hub/src/models/efficientnet/efficientnet_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/efficientnet/efficientnet_image_converter.py +10 -0
- keras_hub/src/models/efficientnet/efficientnet_presets.py +193 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +84 -41
- keras_hub/src/models/efficientnet/mbconv.py +53 -22
- keras_hub/src/models/electra/electra_backbone.py +2 -2
- keras_hub/src/models/electra/electra_presets.py +6 -18
- keras_hub/src/models/f_net/f_net_presets.py +2 -6
- keras_hub/src/models/f_net/f_net_text_classifier.py +3 -3
- keras_hub/src/models/f_net/f_net_text_classifier_preprocessor.py +3 -3
- keras_hub/src/models/falcon/falcon_backbone.py +5 -3
- keras_hub/src/models/falcon/falcon_causal_lm.py +18 -8
- keras_hub/src/models/falcon/falcon_presets.py +1 -3
- keras_hub/src/models/falcon/falcon_tokenizer.py +7 -2
- keras_hub/src/models/feature_pyramid_backbone.py +1 -1
- keras_hub/src/models/flux/__init__.py +5 -0
- keras_hub/src/models/flux/flux_layers.py +496 -0
- keras_hub/src/models/flux/flux_maths.py +225 -0
- keras_hub/src/models/flux/flux_model.py +236 -0
- keras_hub/src/models/flux/flux_presets.py +3 -0
- keras_hub/src/models/flux/flux_text_to_image.py +146 -0
- keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +73 -0
- keras_hub/src/models/gemma/gemma_backbone.py +35 -20
- keras_hub/src/models/gemma/gemma_causal_lm.py +2 -2
- keras_hub/src/models/gemma/gemma_decoder_block.py +3 -1
- keras_hub/src/models/gemma/gemma_presets.py +29 -63
- keras_hub/src/models/gpt2/gpt2_causal_lm.py +2 -2
- keras_hub/src/models/gpt2/gpt2_presets.py +5 -14
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +2 -1
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +3 -3
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +2 -1
- keras_hub/src/models/image_classifier.py +147 -2
- keras_hub/src/models/image_classifier_preprocessor.py +6 -3
- keras_hub/src/models/image_object_detector.py +87 -0
- keras_hub/src/models/image_object_detector_preprocessor.py +57 -0
- keras_hub/src/models/image_segmenter.py +0 -5
- keras_hub/src/models/image_segmenter_preprocessor.py +29 -4
- keras_hub/src/models/image_to_image.py +417 -0
- keras_hub/src/models/inpaint.py +520 -0
- keras_hub/src/models/llama/llama_backbone.py +138 -12
- keras_hub/src/models/llama/llama_causal_lm.py +3 -1
- keras_hub/src/models/llama/llama_presets.py +10 -20
- keras_hub/src/models/llama3/llama3_backbone.py +12 -11
- keras_hub/src/models/llama3/llama3_causal_lm.py +1 -1
- keras_hub/src/models/llama3/llama3_presets.py +4 -12
- keras_hub/src/models/llama3/llama3_tokenizer.py +25 -2
- keras_hub/src/models/mistral/mistral_backbone.py +16 -15
- keras_hub/src/models/mistral/mistral_causal_lm.py +6 -4
- keras_hub/src/models/mistral/mistral_presets.py +3 -9
- keras_hub/src/models/mistral/mistral_transformer_decoder.py +2 -1
- keras_hub/src/models/mit/__init__.py +6 -0
- keras_hub/src/models/{mix_transformer/mix_transformer_backbone.py → mit/mit_backbone.py} +47 -36
- keras_hub/src/models/mit/mit_image_classifier.py +12 -0
- keras_hub/src/models/mit/mit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/mit/mit_image_converter.py +8 -0
- keras_hub/src/models/{mix_transformer/mix_transformer_layers.py → mit/mit_layers.py} +20 -13
- keras_hub/src/models/mit/mit_presets.py +139 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +8 -8
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +0 -92
- keras_hub/src/models/opt/opt_causal_lm.py +2 -2
- keras_hub/src/models/opt/opt_presets.py +4 -12
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +63 -17
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py +3 -1
- keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py +21 -23
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +2 -4
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +173 -17
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +14 -26
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -1
- keras_hub/src/models/phi3/phi3_decoder.py +0 -1
- keras_hub/src/models/phi3/phi3_presets.py +2 -6
- keras_hub/src/models/phi3/phi3_rotary_embedding.py +1 -1
- keras_hub/src/models/preprocessor.py +25 -11
- keras_hub/src/models/resnet/resnet_backbone.py +3 -14
- keras_hub/src/models/resnet/resnet_image_classifier.py +0 -137
- keras_hub/src/models/resnet/resnet_image_converter.py +2 -4
- keras_hub/src/models/resnet/resnet_presets.py +127 -18
- keras_hub/src/models/retinanet/__init__.py +5 -0
- keras_hub/src/models/retinanet/anchor_generator.py +52 -53
- keras_hub/src/models/retinanet/feature_pyramid.py +103 -39
- keras_hub/src/models/retinanet/non_max_supression.py +1 -0
- keras_hub/src/models/retinanet/prediction_head.py +192 -0
- keras_hub/src/models/retinanet/retinanet_backbone.py +146 -0
- keras_hub/src/models/retinanet/retinanet_image_converter.py +53 -0
- keras_hub/src/models/retinanet/retinanet_label_encoder.py +49 -51
- keras_hub/src/models/retinanet/retinanet_object_detector.py +381 -0
- keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/retinanet/retinanet_presets.py +16 -0
- keras_hub/src/models/roberta/roberta_backbone.py +2 -2
- keras_hub/src/models/roberta/roberta_presets.py +6 -8
- keras_hub/src/models/roberta/roberta_text_classifier.py +3 -3
- keras_hub/src/models/sam/__init__.py +5 -0
- keras_hub/src/models/sam/sam_backbone.py +2 -3
- keras_hub/src/models/sam/sam_image_converter.py +2 -4
- keras_hub/src/models/sam/sam_image_segmenter.py +16 -16
- keras_hub/src/models/sam/sam_image_segmenter_preprocessor.py +11 -1
- keras_hub/src/models/sam/sam_layers.py +5 -3
- keras_hub/src/models/sam/sam_presets.py +3 -9
- keras_hub/src/models/sam/sam_prompt_encoder.py +4 -2
- keras_hub/src/models/sam/sam_transformer.py +5 -4
- keras_hub/src/models/segformer/__init__.py +8 -0
- keras_hub/src/models/segformer/segformer_backbone.py +167 -0
- keras_hub/src/models/segformer/segformer_image_converter.py +8 -0
- keras_hub/src/models/segformer/segformer_image_segmenter.py +184 -0
- keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +31 -0
- keras_hub/src/models/segformer/segformer_presets.py +136 -0
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +1 -1
- keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +8 -1
- keras_hub/src/models/stable_diffusion_3/mmdit.py +577 -190
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +189 -163
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +178 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +193 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +43 -7
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +25 -14
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +1 -1
- keras_hub/src/models/t5/t5_backbone.py +5 -4
- keras_hub/src/models/t5/t5_presets.py +47 -19
- keras_hub/src/models/task.py +47 -39
- keras_hub/src/models/text_classifier.py +2 -2
- keras_hub/src/models/text_to_image.py +106 -41
- keras_hub/src/models/vae/__init__.py +1 -0
- keras_hub/src/models/vae/vae_backbone.py +184 -0
- keras_hub/src/models/vae/vae_layers.py +739 -0
- keras_hub/src/models/vgg/__init__.py +5 -0
- keras_hub/src/models/vgg/vgg_backbone.py +4 -24
- keras_hub/src/models/vgg/vgg_image_classifier.py +139 -33
- keras_hub/src/models/vgg/vgg_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vgg/vgg_image_converter.py +8 -0
- keras_hub/src/models/vgg/vgg_presets.py +48 -0
- keras_hub/src/models/vit/__init__.py +5 -0
- keras_hub/src/models/vit/vit_backbone.py +152 -0
- keras_hub/src/models/vit/vit_image_classifier.py +187 -0
- keras_hub/src/models/vit/vit_image_classifier_preprocessor.py +12 -0
- keras_hub/src/models/vit/vit_image_converter.py +73 -0
- keras_hub/src/models/vit/vit_layers.py +391 -0
- keras_hub/src/models/vit/vit_presets.py +126 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +6 -4
- keras_hub/src/models/vit_det/vit_layers.py +3 -3
- keras_hub/src/models/whisper/whisper_audio_converter.py +2 -4
- keras_hub/src/models/whisper/whisper_backbone.py +6 -5
- keras_hub/src/models/whisper/whisper_decoder.py +3 -5
- keras_hub/src/models/whisper/whisper_presets.py +10 -30
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +1 -1
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +2 -2
- keras_hub/src/models/xlm_roberta/xlm_roberta_presets.py +2 -6
- keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier.py +4 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +2 -1
- keras_hub/src/models/xlnet/relative_attention.py +20 -19
- keras_hub/src/models/xlnet/xlnet_backbone.py +2 -2
- keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +3 -5
- keras_hub/src/models/xlnet/xlnet_encoder.py +7 -9
- keras_hub/src/samplers/contrastive_sampler.py +2 -3
- keras_hub/src/samplers/sampler.py +2 -1
- keras_hub/src/tests/test_case.py +41 -6
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +7 -3
- keras_hub/src/tokenizers/byte_tokenizer.py +3 -10
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +2 -9
- keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +9 -11
- keras_hub/src/tokenizers/tokenizer.py +10 -13
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +9 -7
- keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +10 -3
- keras_hub/src/utils/keras_utils.py +2 -13
- keras_hub/src/utils/pipeline_model.py +3 -3
- keras_hub/src/utils/preset_utils.py +196 -144
- keras_hub/src/utils/tensor_utils.py +4 -4
- keras_hub/src/utils/timm/convert_densenet.py +6 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +447 -0
- keras_hub/src/utils/timm/convert_resnet.py +1 -1
- keras_hub/src/utils/timm/convert_vgg.py +85 -0
- keras_hub/src/utils/timm/preset_loader.py +14 -9
- keras_hub/src/utils/transformers/convert_llama3.py +21 -5
- keras_hub/src/utils/transformers/convert_vit.py +150 -0
- keras_hub/src/utils/transformers/preset_loader.py +23 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +4 -3
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/METADATA +86 -68
- keras_hub_nightly-0.19.0.dev202501260345.dist-info/RECORD +423 -0
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/WHEEL +1 -1
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +0 -138
- keras_hub/src/models/mix_transformer/__init__.py +0 -0
- keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +0 -119
- keras_hub/src/models/stable_diffusion_3/vae_image_decoder.py +0 -320
- keras_hub_nightly-0.16.1.dev202410020340.dist-info/RECORD +0 -357
- {keras_hub_nightly-0.16.1.dev202410020340.dist-info → keras_hub_nightly-0.19.0.dev202501260345.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,9 @@ backbone_presets = {
|
|
8
8
|
"lowercased. Trained on English Wikipedia + BooksCorpus."
|
9
9
|
),
|
10
10
|
"params": 13548800,
|
11
|
-
"official_name": "ELECTRA",
|
12
11
|
"path": "electra",
|
13
|
-
"model_card": "https://github.com/google-research/electra",
|
14
12
|
},
|
15
|
-
"kaggle_handle": "kaggle://keras/electra/keras/electra_small_discriminator_uncased_en/
|
13
|
+
"kaggle_handle": "kaggle://keras/electra/keras/electra_small_discriminator_uncased_en/2",
|
16
14
|
},
|
17
15
|
"electra_small_generator_uncased_en": {
|
18
16
|
"metadata": {
|
@@ -21,11 +19,9 @@ backbone_presets = {
|
|
21
19
|
"lowercased. Trained on English Wikipedia + BooksCorpus."
|
22
20
|
),
|
23
21
|
"params": 13548800,
|
24
|
-
"official_name": "ELECTRA",
|
25
22
|
"path": "electra",
|
26
|
-
"model_card": "https://github.com/google-research/electra",
|
27
23
|
},
|
28
|
-
"kaggle_handle": "kaggle://keras/electra/keras/electra_small_generator_uncased_en/
|
24
|
+
"kaggle_handle": "kaggle://keras/electra/keras/electra_small_generator_uncased_en/2",
|
29
25
|
},
|
30
26
|
"electra_base_discriminator_uncased_en": {
|
31
27
|
"metadata": {
|
@@ -34,11 +30,9 @@ backbone_presets = {
|
|
34
30
|
"lowercased. Trained on English Wikipedia + BooksCorpus."
|
35
31
|
),
|
36
32
|
"params": 109482240,
|
37
|
-
"official_name": "ELECTRA",
|
38
33
|
"path": "electra",
|
39
|
-
"model_card": "https://github.com/google-research/electra",
|
40
34
|
},
|
41
|
-
"kaggle_handle": "kaggle://keras/electra/keras/electra_base_discriminator_uncased_en/
|
35
|
+
"kaggle_handle": "kaggle://keras/electra/keras/electra_base_discriminator_uncased_en/2",
|
42
36
|
},
|
43
37
|
"electra_base_generator_uncased_en": {
|
44
38
|
"metadata": {
|
@@ -47,11 +41,9 @@ backbone_presets = {
|
|
47
41
|
"lowercased. Trained on English Wikipedia + BooksCorpus."
|
48
42
|
),
|
49
43
|
"params": 33576960,
|
50
|
-
"official_name": "ELECTRA",
|
51
44
|
"path": "electra",
|
52
|
-
"model_card": "https://github.com/google-research/electra",
|
53
45
|
},
|
54
|
-
"kaggle_handle": "kaggle://keras/electra/keras/electra_base_generator_uncased_en/
|
46
|
+
"kaggle_handle": "kaggle://keras/electra/keras/electra_base_generator_uncased_en/2",
|
55
47
|
},
|
56
48
|
"electra_large_discriminator_uncased_en": {
|
57
49
|
"metadata": {
|
@@ -60,11 +52,9 @@ backbone_presets = {
|
|
60
52
|
"lowercased. Trained on English Wikipedia + BooksCorpus."
|
61
53
|
),
|
62
54
|
"params": 335141888,
|
63
|
-
"official_name": "ELECTRA",
|
64
55
|
"path": "electra",
|
65
|
-
"model_card": "https://github.com/google-research/electra",
|
66
56
|
},
|
67
|
-
"kaggle_handle": "kaggle://keras/electra/keras/electra_large_discriminator_uncased_en/
|
57
|
+
"kaggle_handle": "kaggle://keras/electra/keras/electra_large_discriminator_uncased_en/2",
|
68
58
|
},
|
69
59
|
"electra_large_generator_uncased_en": {
|
70
60
|
"metadata": {
|
@@ -73,10 +63,8 @@ backbone_presets = {
|
|
73
63
|
"lowercased. Trained on English Wikipedia + BooksCorpus."
|
74
64
|
),
|
75
65
|
"params": 51065344,
|
76
|
-
"official_name": "ELECTRA",
|
77
66
|
"path": "electra",
|
78
|
-
"model_card": "https://github.com/google-research/electra",
|
79
67
|
},
|
80
|
-
"kaggle_handle": "kaggle://keras/electra/keras/electra_large_generator_uncased_en/
|
68
|
+
"kaggle_handle": "kaggle://keras/electra/keras/electra_large_generator_uncased_en/2",
|
81
69
|
},
|
82
70
|
}
|
@@ -8,11 +8,9 @@ backbone_presets = {
|
|
8
8
|
"Trained on the C4 dataset."
|
9
9
|
),
|
10
10
|
"params": 82861056,
|
11
|
-
"official_name": "FNet",
|
12
11
|
"path": "f_net",
|
13
|
-
"model_card": "https://github.com/google-research/google-research/blob/master/f_net/README.md",
|
14
12
|
},
|
15
|
-
"kaggle_handle": "kaggle://keras/f_net/keras/f_net_base_en/
|
13
|
+
"kaggle_handle": "kaggle://keras/f_net/keras/f_net_base_en/3",
|
16
14
|
},
|
17
15
|
"f_net_large_en": {
|
18
16
|
"metadata": {
|
@@ -21,10 +19,8 @@ backbone_presets = {
|
|
21
19
|
"Trained on the C4 dataset."
|
22
20
|
),
|
23
21
|
"params": 236945408,
|
24
|
-
"official_name": "FNet",
|
25
22
|
"path": "f_net",
|
26
|
-
"model_card": "https://github.com/google-research/google-research/blob/master/f_net/README.md",
|
27
23
|
},
|
28
|
-
"kaggle_handle": "kaggle://keras/f_net/keras/f_net_large_en/
|
24
|
+
"kaggle_handle": "kaggle://keras/f_net/keras/f_net_large_en/3",
|
29
25
|
},
|
30
26
|
}
|
@@ -34,9 +34,9 @@ class FNetTextClassifier(TextClassifier):
|
|
34
34
|
Args:
|
35
35
|
backbone: A `keras_hub.models.FNetBackbone` instance.
|
36
36
|
num_classes: int. Number of classes to predict.
|
37
|
-
preprocessor: A `keras_hub.models.FNetTextClassifierPreprocessor` or
|
38
|
-
`None`, this model will not apply preprocessing, and
|
39
|
-
be preprocessed before calling the model.
|
37
|
+
preprocessor: A `keras_hub.models.FNetTextClassifierPreprocessor` or
|
38
|
+
`None`. If `None`, this model will not apply preprocessing, and
|
39
|
+
inputs should be preprocessed before calling the model.
|
40
40
|
activation: Optional `str` or callable. The
|
41
41
|
activation function to use on the model outputs. Set
|
42
42
|
`activation="softmax"` to return output probabilities.
|
@@ -22,9 +22,9 @@ class FNetTextClassifierPreprocessor(TextClassifierPreprocessor):
|
|
22
22
|
|
23
23
|
1. Tokenize any number of input segments using the `tokenizer`.
|
24
24
|
2. Pack the inputs together using a `keras_hub.layers.MultiSegmentPacker`.
|
25
|
-
|
26
|
-
3. Construct a dictionary with keys `"token_ids"`, and `"segment_ids"`
|
27
|
-
|
25
|
+
with the appropriate `"[CLS]"`, `"[SEP]"` and `"<pad>"` tokens.
|
26
|
+
3. Construct a dictionary with keys `"token_ids"`, and `"segment_ids"`
|
27
|
+
that can be passed directly to `keras_hub.models.FNetBackbone`.
|
28
28
|
|
29
29
|
This layer can be used directly with `tf.data.Dataset.map` to preprocess
|
30
30
|
string data in the `(x, y, sample_weight)` format used by
|
@@ -20,15 +20,17 @@ class FalconBackbone(Backbone):
|
|
20
20
|
Args:
|
21
21
|
vocabulary_size: int. The size of the token vocabulary.
|
22
22
|
num_layers: int. The number of transformer layers.
|
23
|
-
num_attention_heads: int. The number of attention heads for each
|
24
|
-
The hidden size must be divisible by the number of
|
23
|
+
num_attention_heads: int. The number of attention heads for each
|
24
|
+
transformer. The hidden size must be divisible by the number of
|
25
|
+
attention heads.
|
25
26
|
hidden_dim: int. The dimensionality of the embeddings and hidden states.
|
26
27
|
intermediate_dim: int. The output dimension of the first Dense layer in
|
27
28
|
the MLP network of each transformer.
|
28
29
|
layer_norm_epsilon: float. Epsilon for the layer normalization layers in
|
29
30
|
the transformer decoder.
|
30
31
|
attention_dropout_rate: float. Dropout probability for the attention.
|
31
|
-
feedforward_dropout_rate: flaot. Dropout probability for the
|
32
|
+
feedforward_dropout_rate: flaot. Dropout probability for the
|
33
|
+
feedforward.
|
32
34
|
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
|
33
35
|
for model computations and weights. Note that some computations,
|
34
36
|
such as softmax and layer normalization, will always be done at
|
@@ -40,7 +40,9 @@ class FalconCausalLM(CausalLM):
|
|
40
40
|
|
41
41
|
Use `generate()` to do text generation.
|
42
42
|
```python
|
43
|
-
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
43
|
+
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
44
|
+
"falcon_refinedweb_1b_en"
|
45
|
+
)
|
44
46
|
falcon_lm.generate("I want to say", max_length=30)
|
45
47
|
|
46
48
|
# Generate with batched prompts.
|
@@ -49,7 +51,9 @@ class FalconCausalLM(CausalLM):
|
|
49
51
|
|
50
52
|
Compile the `generate()` function with a custom sampler.
|
51
53
|
```python
|
52
|
-
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
54
|
+
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
55
|
+
"falcon_refinedweb_1b_en"
|
56
|
+
)
|
53
57
|
falcon_lm.compile(sampler="top_k")
|
54
58
|
falcon_lm.generate("I want to say", max_length=30)
|
55
59
|
|
@@ -60,7 +64,8 @@ class FalconCausalLM(CausalLM):
|
|
60
64
|
Use `generate()` without preprocessing.
|
61
65
|
```python
|
62
66
|
prompt = {
|
63
|
-
# Token ids for
|
67
|
+
# Token ids for
|
68
|
+
# "<|endoftext|> Keras is".
|
64
69
|
"token_ids": np.array([[50256, 17337, 292, 318]] * 2),
|
65
70
|
# Use `"padding_mask"` to indicate values that should not be overridden.
|
66
71
|
"padding_mask": np.array([[1, 1, 1, 1]] * 2),
|
@@ -76,15 +81,20 @@ class FalconCausalLM(CausalLM):
|
|
76
81
|
Call `fit()` on a single batch.
|
77
82
|
```python
|
78
83
|
features = ["The quick brown fox jumped.", "I forgot my homework."]
|
79
|
-
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
84
|
+
falcon_lm = keras_hub.models.FalconCausalLM.from_preset(
|
85
|
+
"falcon_refinedweb_1b_en"
|
86
|
+
)
|
80
87
|
falcon_lm.fit(x=features, batch_size=2)
|
81
88
|
```
|
82
89
|
|
83
90
|
Call `fit()` without preprocessing.
|
84
91
|
```python
|
85
92
|
x = {
|
86
|
-
# Token ids for
|
87
|
-
"
|
93
|
+
# Token ids for
|
94
|
+
# "<|endoftext|> Keras is deep learning library<|endoftext|>"
|
95
|
+
"token_ids": np.array(
|
96
|
+
[[50256, 17337, 292, 318, 2769,4673,5888, 50256, 0]] * 2
|
97
|
+
),
|
88
98
|
"padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 0]] * 2),
|
89
99
|
}
|
90
100
|
y = np.array([[17337, 292, 318, 2769, 4673, 5888, 50256, 0, 0]] * 2)
|
@@ -164,8 +174,8 @@ class FalconCausalLM(CausalLM):
|
|
164
174
|
Args:
|
165
175
|
token_ids: a dense int Tensor with shape `(batch_size, max_length)`.
|
166
176
|
cache: a dense float Tensor, the cache of key and value.
|
167
|
-
cache_update_index: int, or int Tensor. The index of current inputs
|
168
|
-
whole sequence.
|
177
|
+
cache_update_index: int, or int Tensor. The index of current inputs
|
178
|
+
in the whole sequence.
|
169
179
|
|
170
180
|
Returns:
|
171
181
|
A (logits, hidden_states, cache) tuple. Where `logits` is the
|
@@ -8,10 +8,8 @@ backbone_presets = {
|
|
8
8
|
"350B tokens of RefinedWeb dataset."
|
9
9
|
),
|
10
10
|
"params": 1311625216,
|
11
|
-
"official_name": "Falcon",
|
12
11
|
"path": "falcon",
|
13
|
-
"model_card": "https://huggingface.co/tiiuae/falcon-rw-1b",
|
14
12
|
},
|
15
|
-
"kaggle_handle": "kaggle://keras/falcon/keras/falcon_refinedweb_1b_en/
|
13
|
+
"kaggle_handle": "kaggle://keras/falcon/keras/falcon_refinedweb_1b_en/2",
|
16
14
|
},
|
17
15
|
}
|
@@ -36,7 +36,9 @@ class FalconTokenizer(BytePairTokenizer):
|
|
36
36
|
|
37
37
|
```python
|
38
38
|
# Unbatched input.
|
39
|
-
tokenizer = keras_hub.models.FalconTokenizer.from_preset(
|
39
|
+
tokenizer = keras_hub.models.FalconTokenizer.from_preset(
|
40
|
+
"falcon_refinedweb_1b_en"
|
41
|
+
)
|
40
42
|
tokenizer("The quick brown fox jumped.")
|
41
43
|
|
42
44
|
# Batched input.
|
@@ -49,7 +51,10 @@ class FalconTokenizer(BytePairTokenizer):
|
|
49
51
|
vocab = {"<|endoftext|>": 0, "a": 4, "Ġquick": 5, "Ġfox": 6}
|
50
52
|
merges = ["Ġ q", "u i", "c k", "ui ck", "Ġq uick"]
|
51
53
|
merges += ["Ġ f", "o x", "Ġf ox"]
|
52
|
-
tokenizer = keras_hub.models.FalconTokenizer(
|
54
|
+
tokenizer = keras_hub.models.FalconTokenizer(
|
55
|
+
vocabulary=vocab,
|
56
|
+
merges=merges,
|
57
|
+
)
|
53
58
|
tokenizer("a quick fox.")
|
54
59
|
```
|
55
60
|
"""
|
@@ -15,7 +15,7 @@ class FeaturePyramidBackbone(Backbone):
|
|
15
15
|
Example:
|
16
16
|
|
17
17
|
```python
|
18
|
-
input_data = np.random.uniform(0,
|
18
|
+
input_data = np.random.uniform(0, 256, size=(2, 224, 224, 3))
|
19
19
|
|
20
20
|
# Convert to feature pyramid output format using ResNet.
|
21
21
|
backbone = ResNetBackbone.from_preset("resnet50")
|