keras-nlp 0.8.2.dev0__tar.gz → 0.9.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/PKG-INFO +1 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/__init__.py +1 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/models/__init__.py +24 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/__init__.py +1 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/conftest.py +18 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/alibi_bias.py +7 -2
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/f_net_encoder.py +4 -5
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/masked_lm_head.py +3 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/position_embedding.py +3 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/reversible_embedding.py +4 -2
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/rotary_embedding.py +31 -18
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/sine_position_encoding.py +3 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/token_and_position_embedding.py +6 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_decoder.py +6 -9
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_encoder.py +3 -3
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/__init__.py +22 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/albert/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_backbone.py +2 -9
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_classifier.py +5 -29
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_masked_lm.py +6 -27
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_preprocessor.py +2 -12
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_tokenizer.py +0 -8
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/backbone.py +100 -42
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/bart/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_backbone.py +1 -8
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_seq_2_seq_lm.py +16 -35
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +0 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_tokenizer.py +0 -7
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/bert/__init__.py +24 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_backbone.py +1 -8
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_classifier.py +5 -30
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_masked_lm.py +6 -29
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_preprocessor.py +2 -15
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_tokenizer.py +15 -19
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/bloom/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_backbone.py +4 -16
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/bloom/bloom_causal_lm.py +298 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -6
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_decoder.py +15 -19
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_preprocessor.py +6 -15
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/bloom/bloom_presets.py +122 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_tokenizer.py +11 -18
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/generative_task.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/causal_lm.py +137 -28
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/classifier.py +113 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/deberta_v3/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_classifier.py +5 -28
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm.py +6 -26
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_tokenizer.py +0 -7
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/distil_bert/__init__.py +23 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_classifier.py +5 -28
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_masked_lm.py +6 -26
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_tokenizer.py +15 -15
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/electra/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/electra/electra_backbone.py +14 -4
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/electra/electra_preprocessor.py +154 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/electra/electra_presets.py +96 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/electra/electra_tokenizer.py +29 -13
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/f_net/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_classifier.py +5 -28
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_masked_lm.py +6 -26
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_tokenizer.py +0 -7
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/__init__.py +21 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_attention.py +157 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_backbone.py +162 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_causal_lm_preprocessor.py +179 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_preprocessor.py +187 -0
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/bloom/bloom_presets.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_presets.py +10 -10
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_tokenizer.py +111 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_transformer_decoder.py +255 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/gemma/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_attention.py +17 -26
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_backbone.py +26 -17
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_causal_lm.py +40 -35
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_causal_lm_preprocessor.py +3 -6
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_tokenizer.py +0 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/rms_normalization.py +1 -1
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/gpt2/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_causal_lm.py +147 -35
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_causal_lm_preprocessor.py +2 -2
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_tokenizer.py +0 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_backbone.py +1 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +16 -29
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +2 -2
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +2 -5
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_attention.py +76 -62
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_backbone.py +64 -38
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_causal_lm.py +334 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_causal_lm_preprocessor.py +186 -0
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/mistral/mistral_transformer_decoder.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_decoder.py +19 -23
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_layernorm.py +21 -10
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_preprocessor.py +189 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_presets.py +39 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_tokenizer.py +8 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/masked_lm.py +105 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/mistral/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_attention.py +0 -5
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_backbone.py +2 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_causal_lm.py +141 -34
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_causal_lm_preprocessor.py +2 -2
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_layer_norm.py +11 -10
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_presets.py +10 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_tokenizer.py +1 -8
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/llama/llama_decoder.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/mistral/mistral_transformer_decoder.py +81 -33
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/opt/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_causal_lm.py +16 -36
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_causal_lm_preprocessor.py +2 -2
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_tokenizer.py +0 -7
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/preprocessor.py +156 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/roberta/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_classifier.py +5 -28
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_masked_lm.py +5 -25
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_tokenizer.py +0 -7
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/seq_2_seq_lm.py +55 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/t5/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_tokenizer.py +0 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/task.py +107 -104
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/whisper/__init__.py +21 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_audio_feature_extractor.py +0 -53
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_backbone.py +1 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_preprocessor.py +2 -15
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_tokenizer.py +0 -7
- keras-nlp-0.9.0.dev0/keras_nlp/src/models/xlm_roberta/__init__.py +23 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_backbone.py +0 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_classifier.py +5 -28
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_masked_lm.py +6 -26
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_preprocessor.py +2 -11
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_tokenizer.py +0 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/xlnet_backbone.py +2 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/beam_sampler.py +5 -4
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/contrastive_sampler.py +5 -5
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/sampler.py +6 -5
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tests/test_case.py +4 -6
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/byte_pair_tokenizer.py +12 -75
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/byte_tokenizer.py +7 -7
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/sentence_piece_tokenizer.py +10 -73
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/tokenizer.py +95 -10
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/unicode_codepoint_tokenizer.py +9 -9
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/word_piece_tokenizer.py +97 -82
- keras-nlp-0.9.0.dev0/keras_nlp/src/utils/preset_utils.py +412 -0
- keras-nlp-0.9.0.dev0/keras_nlp/src/utils/python_utils.py +22 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/utils/tensor_utils.py +27 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/version_utils.py +1 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/PKG-INFO +1 -1
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/SOURCES.txt +19 -1
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/bart/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/distil_bert/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/electra/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/f_net/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/gemma/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/gpt2/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/gpt_neo_x/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/llama/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/mistral/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/opt/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/preprocessor.py +0 -120
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/roberta/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/t5/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/whisper/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/xlm_roberta/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/models/xlnet/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/tests/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/utils/__init__.py +0 -14
- keras-nlp-0.8.2.dev0/keras_nlp/src/utils/preset_utils.py +0 -219
- keras-nlp-0.8.2.dev0/keras_nlp/src/utils/python_utils.py +0 -47
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/README.md +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/layers/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/metrics/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/samplers/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/api_export.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/config.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/keras.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/ops.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/random.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/cached_multi_head_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_layer_utils.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/masked_lm_mask_generator.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/multi_segment_packer.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/preprocessing_layer.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/random_deletion.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/random_swap.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/start_end_packer.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/bleu.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/edit_distance.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/perplexity.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/rouge_base.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/rouge_l.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/rouge_n.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/disentangled_attention_encoder.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/disentangled_self_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/relative_embedding.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_decoder_block.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_presets.py +0 -0
- {keras-nlp-0.8.2.dev0/keras_nlp/src/models/albert → keras-nlp-0.9.0.dev0/keras_nlp/src/models/gpt_neo_x}/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_decoder.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_layer_norm.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_multi_head_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_transformer_layer.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_cached_multi_head_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_decoder.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_encoder.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_presets.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_presets.py +0 -0
- {keras-nlp-0.8.2.dev0/keras_nlp/src/models/bert → keras-nlp-0.9.0.dev0/keras_nlp/src/models/xlnet}/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/relative_attention.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/xlnet_content_and_query_embedding.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/xlnet_encoder.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/greedy_sampler.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/random_sampler.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/serialization.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/top_k_sampler.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/top_p_sampler.py +0 -0
- {keras-nlp-0.8.2.dev0/keras_nlp/src/models/bloom → keras-nlp-0.9.0.dev0/keras_nlp/src/tests}/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/sentence_piece_tokenizer_trainer.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/word_piece_tokenizer_trainer.py +0 -0
- {keras-nlp-0.8.2.dev0/keras_nlp/src/models/deberta_v3 → keras-nlp-0.9.0.dev0/keras_nlp/src/utils}/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/utils/keras_utils.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/utils/pipeline_model.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/tokenizers/__init__.py +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/dependency_links.txt +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/requires.txt +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/top_level.txt +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/setup.cfg +0 -0
- {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/setup.py +0 -0
|
@@ -10,5 +10,6 @@ from keras_nlp import metrics
|
|
|
10
10
|
from keras_nlp import models
|
|
11
11
|
from keras_nlp import samplers
|
|
12
12
|
from keras_nlp import tokenizers
|
|
13
|
+
from keras_nlp.src.utils.preset_utils import upload_preset
|
|
13
14
|
from keras_nlp.src.version_utils import version
|
|
14
15
|
from keras_nlp.src.version_utils import __version__
|
|
@@ -11,6 +11,7 @@ from keras_nlp.src.models.albert.albert_masked_lm import AlbertMaskedLM
|
|
|
11
11
|
from keras_nlp.src.models.albert.albert_masked_lm_preprocessor import AlbertMaskedLMPreprocessor
|
|
12
12
|
from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
|
|
13
13
|
from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
|
|
14
|
+
from keras_nlp.src.models.backbone import Backbone
|
|
14
15
|
from keras_nlp.src.models.bart.bart_backbone import BartBackbone
|
|
15
16
|
from keras_nlp.src.models.bart.bart_preprocessor import BartPreprocessor
|
|
16
17
|
from keras_nlp.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
|
|
@@ -22,6 +23,13 @@ from keras_nlp.src.models.bert.bert_masked_lm import BertMaskedLM
|
|
|
22
23
|
from keras_nlp.src.models.bert.bert_masked_lm_preprocessor import BertMaskedLMPreprocessor
|
|
23
24
|
from keras_nlp.src.models.bert.bert_preprocessor import BertPreprocessor
|
|
24
25
|
from keras_nlp.src.models.bert.bert_tokenizer import BertTokenizer
|
|
26
|
+
from keras_nlp.src.models.bloom.bloom_backbone import BloomBackbone
|
|
27
|
+
from keras_nlp.src.models.bloom.bloom_causal_lm import BloomCausalLM
|
|
28
|
+
from keras_nlp.src.models.bloom.bloom_causal_lm_preprocessor import BloomCausalLMPreprocessor
|
|
29
|
+
from keras_nlp.src.models.bloom.bloom_preprocessor import BloomPreprocessor
|
|
30
|
+
from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
|
|
31
|
+
from keras_nlp.src.models.causal_lm import CausalLM
|
|
32
|
+
from keras_nlp.src.models.classifier import Classifier
|
|
25
33
|
from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import DebertaV3Backbone
|
|
26
34
|
from keras_nlp.src.models.deberta_v3.deberta_v3_classifier import DebertaV3Classifier
|
|
27
35
|
from keras_nlp.src.models.deberta_v3.deberta_v3_masked_lm import DebertaV3MaskedLM
|
|
@@ -34,12 +42,19 @@ from keras_nlp.src.models.distil_bert.distil_bert_masked_lm import DistilBertMas
|
|
|
34
42
|
from keras_nlp.src.models.distil_bert.distil_bert_masked_lm_preprocessor import DistilBertMaskedLMPreprocessor
|
|
35
43
|
from keras_nlp.src.models.distil_bert.distil_bert_preprocessor import DistilBertPreprocessor
|
|
36
44
|
from keras_nlp.src.models.distil_bert.distil_bert_tokenizer import DistilBertTokenizer
|
|
45
|
+
from keras_nlp.src.models.electra.electra_backbone import ElectraBackbone
|
|
46
|
+
from keras_nlp.src.models.electra.electra_preprocessor import ElectraPreprocessor
|
|
47
|
+
from keras_nlp.src.models.electra.electra_tokenizer import ElectraTokenizer
|
|
37
48
|
from keras_nlp.src.models.f_net.f_net_backbone import FNetBackbone
|
|
38
49
|
from keras_nlp.src.models.f_net.f_net_classifier import FNetClassifier
|
|
39
50
|
from keras_nlp.src.models.f_net.f_net_masked_lm import FNetMaskedLM
|
|
40
51
|
from keras_nlp.src.models.f_net.f_net_masked_lm_preprocessor import FNetMaskedLMPreprocessor
|
|
41
52
|
from keras_nlp.src.models.f_net.f_net_preprocessor import FNetPreprocessor
|
|
42
53
|
from keras_nlp.src.models.f_net.f_net_tokenizer import FNetTokenizer
|
|
54
|
+
from keras_nlp.src.models.falcon.falcon_backbone import FalconBackbone
|
|
55
|
+
from keras_nlp.src.models.falcon.falcon_causal_lm_preprocessor import FalconCausalLMPreprocessor
|
|
56
|
+
from keras_nlp.src.models.falcon.falcon_preprocessor import FalconPreprocessor
|
|
57
|
+
from keras_nlp.src.models.falcon.falcon_tokenizer import FalconTokenizer
|
|
43
58
|
from keras_nlp.src.models.gemma.gemma_backbone import GemmaBackbone
|
|
44
59
|
from keras_nlp.src.models.gemma.gemma_causal_lm import GemmaCausalLM
|
|
45
60
|
from keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor import GemmaCausalLMPreprocessor
|
|
@@ -50,7 +65,12 @@ from keras_nlp.src.models.gpt2.gpt2_causal_lm import GPT2CausalLM
|
|
|
50
65
|
from keras_nlp.src.models.gpt2.gpt2_causal_lm_preprocessor import GPT2CausalLMPreprocessor
|
|
51
66
|
from keras_nlp.src.models.gpt2.gpt2_preprocessor import GPT2Preprocessor
|
|
52
67
|
from keras_nlp.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer
|
|
68
|
+
from keras_nlp.src.models.llama.llama_backbone import LlamaBackbone
|
|
69
|
+
from keras_nlp.src.models.llama.llama_causal_lm import LlamaCausalLM
|
|
70
|
+
from keras_nlp.src.models.llama.llama_causal_lm_preprocessor import LlamaCausalLMPreprocessor
|
|
71
|
+
from keras_nlp.src.models.llama.llama_preprocessor import LlamaPreprocessor
|
|
53
72
|
from keras_nlp.src.models.llama.llama_tokenizer import LlamaTokenizer
|
|
73
|
+
from keras_nlp.src.models.masked_lm import MaskedLM
|
|
54
74
|
from keras_nlp.src.models.mistral.mistral_backbone import MistralBackbone
|
|
55
75
|
from keras_nlp.src.models.mistral.mistral_causal_lm import MistralCausalLM
|
|
56
76
|
from keras_nlp.src.models.mistral.mistral_causal_lm_preprocessor import MistralCausalLMPreprocessor
|
|
@@ -61,15 +81,19 @@ from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM
|
|
|
61
81
|
from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import OPTCausalLMPreprocessor
|
|
62
82
|
from keras_nlp.src.models.opt.opt_preprocessor import OPTPreprocessor
|
|
63
83
|
from keras_nlp.src.models.opt.opt_tokenizer import OPTTokenizer
|
|
84
|
+
from keras_nlp.src.models.preprocessor import Preprocessor
|
|
64
85
|
from keras_nlp.src.models.roberta.roberta_backbone import RobertaBackbone
|
|
65
86
|
from keras_nlp.src.models.roberta.roberta_classifier import RobertaClassifier
|
|
66
87
|
from keras_nlp.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
|
|
67
88
|
from keras_nlp.src.models.roberta.roberta_masked_lm_preprocessor import RobertaMaskedLMPreprocessor
|
|
68
89
|
from keras_nlp.src.models.roberta.roberta_preprocessor import RobertaPreprocessor
|
|
69
90
|
from keras_nlp.src.models.roberta.roberta_tokenizer import RobertaTokenizer
|
|
91
|
+
from keras_nlp.src.models.seq_2_seq_lm import Seq2SeqLM
|
|
92
|
+
from keras_nlp.src.models.task import Task
|
|
70
93
|
from keras_nlp.src.models.xlm_roberta.xlm_roberta_backbone import XLMRobertaBackbone
|
|
71
94
|
from keras_nlp.src.models.xlm_roberta.xlm_roberta_classifier import XLMRobertaClassifier
|
|
72
95
|
from keras_nlp.src.models.xlm_roberta.xlm_roberta_masked_lm import XLMRobertaMaskedLM
|
|
73
96
|
from keras_nlp.src.models.xlm_roberta.xlm_roberta_masked_lm_preprocessor import XLMRobertaMaskedLMPreprocessor
|
|
74
97
|
from keras_nlp.src.models.xlm_roberta.xlm_roberta_preprocessor import XLMRobertaPreprocessor
|
|
75
98
|
from keras_nlp.src.models.xlm_roberta.xlm_roberta_tokenizer import XLMRobertaTokenizer
|
|
99
|
+
from keras_nlp.src.tokenizers.tokenizer import Tokenizer
|
|
@@ -26,6 +26,7 @@ from keras_nlp.src import models
|
|
|
26
26
|
from keras_nlp.src import samplers
|
|
27
27
|
from keras_nlp.src import tokenizers
|
|
28
28
|
from keras_nlp.src import utils
|
|
29
|
+
from keras_nlp.src.utils.preset_utils import upload_preset
|
|
29
30
|
from keras_nlp.src.version_utils import __version__
|
|
30
31
|
from keras_nlp.src.version_utils import version
|
|
31
32
|
|
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import os
|
|
16
|
+
|
|
15
17
|
import pytest
|
|
16
18
|
import tensorflow as tf
|
|
17
19
|
|
|
@@ -83,6 +85,10 @@ def pytest_configure(config):
|
|
|
83
85
|
"markers",
|
|
84
86
|
"keras_3_only: mark test as a keras 3 only test",
|
|
85
87
|
)
|
|
88
|
+
config.addinivalue_line(
|
|
89
|
+
"markers",
|
|
90
|
+
"kaggle_key_required: mark test needing a kaggle key",
|
|
91
|
+
)
|
|
86
92
|
|
|
87
93
|
|
|
88
94
|
def pytest_collection_modifyitems(config, items):
|
|
@@ -107,6 +113,16 @@ def pytest_collection_modifyitems(config, items):
|
|
|
107
113
|
not backend_config.keras_3(),
|
|
108
114
|
reason="tests only run on with multi-backend keras",
|
|
109
115
|
)
|
|
116
|
+
found_kaggle_key = all(
|
|
117
|
+
[
|
|
118
|
+
os.environ.get("KAGGLE_USERNAME", None),
|
|
119
|
+
os.environ.get("KAGGLE_KEY", None),
|
|
120
|
+
]
|
|
121
|
+
)
|
|
122
|
+
kaggle_key_required = pytest.mark.skipif(
|
|
123
|
+
not found_kaggle_key,
|
|
124
|
+
reason="tests only run with a kaggle api key",
|
|
125
|
+
)
|
|
110
126
|
for item in items:
|
|
111
127
|
if "large" in item.keywords:
|
|
112
128
|
item.add_marker(skip_large)
|
|
@@ -116,6 +132,8 @@ def pytest_collection_modifyitems(config, items):
|
|
|
116
132
|
item.add_marker(tf_only)
|
|
117
133
|
if "keras_3_only" in item.keywords:
|
|
118
134
|
item.add_marker(keras_3_only)
|
|
135
|
+
if "kaggle_key_required" in item.keywords:
|
|
136
|
+
item.add_marker(kaggle_key_required)
|
|
119
137
|
|
|
120
138
|
|
|
121
139
|
# Disable traceback filtering for quicker debugging of tests failures.
|
|
@@ -35,12 +35,15 @@ class AlibiBias(keras.layers.Layer):
|
|
|
35
35
|
each head. The heads' slopes are a geometric sequence that starts at
|
|
36
36
|
`2**(-alibi_bias_max/num_heads)` and uses that same value as its
|
|
37
37
|
ratio. Defaults to 8.
|
|
38
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
39
|
+
including `name`, `trainable`, `dtype` etc.
|
|
40
|
+
|
|
38
41
|
Call arguments:
|
|
39
42
|
attention_scores: The result of multipying the query and the key of the
|
|
40
43
|
multi-head attention layer of the transformer to add alibi bias to
|
|
41
44
|
it. With shape `(batch_size, num_heads, query_length, key_length)`.
|
|
42
45
|
|
|
43
|
-
|
|
46
|
+
Example:
|
|
44
47
|
```python
|
|
45
48
|
query_length = 10
|
|
46
49
|
key_length = 10
|
|
@@ -94,7 +97,9 @@ class AlibiBias(keras.layers.Layer):
|
|
|
94
97
|
)
|
|
95
98
|
slopes = ops.expand_dims(slopes, 1)
|
|
96
99
|
|
|
97
|
-
seq_range = ops.expand_dims(
|
|
100
|
+
seq_range = ops.expand_dims(
|
|
101
|
+
ops.arange(1 - key_length, 1, dtype="int32"), 0
|
|
102
|
+
)
|
|
98
103
|
seq_range = ops.cast(seq_range, dtype=self.compute_dtype)
|
|
99
104
|
|
|
100
105
|
alibi_bias = ops.multiply(slopes, seq_range)
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/f_net_encoder.py
RENAMED
|
@@ -47,10 +47,10 @@ class FNetEncoder(keras.layers.Layer):
|
|
|
47
47
|
bias_initializer: "string" or `keras.initializers` initializer.
|
|
48
48
|
The bias initializer for the dense layers.
|
|
49
49
|
Defaults to `"zeros"`.
|
|
50
|
-
|
|
51
|
-
|
|
50
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
51
|
+
including `name`, `trainable`, `dtype` etc.
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
Example:
|
|
54
54
|
|
|
55
55
|
```python
|
|
56
56
|
# Create a single FNet encoder layer.
|
|
@@ -79,10 +79,9 @@ class FNetEncoder(keras.layers.Layer):
|
|
|
79
79
|
layer_norm_epsilon=1e-5,
|
|
80
80
|
kernel_initializer="glorot_uniform",
|
|
81
81
|
bias_initializer="zeros",
|
|
82
|
-
name=None,
|
|
83
82
|
**kwargs
|
|
84
83
|
):
|
|
85
|
-
super().__init__(
|
|
84
|
+
super().__init__(**kwargs)
|
|
86
85
|
self.intermediate_dim = intermediate_dim
|
|
87
86
|
self.dropout = dropout
|
|
88
87
|
self.activation = keras.activations.get(activation)
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/masked_lm_head.py
RENAMED
|
@@ -59,8 +59,10 @@ class MaskedLMHead(keras.layers.Layer):
|
|
|
59
59
|
bias_initializer: string or `keras.initializers` initializer.
|
|
60
60
|
The bias initializer for the dense and multiheaded
|
|
61
61
|
attention layers. Defaults to `"zeros"`.
|
|
62
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
63
|
+
including `name`, `trainable`, `dtype` etc.
|
|
62
64
|
|
|
63
|
-
|
|
65
|
+
Example:
|
|
64
66
|
|
|
65
67
|
```python
|
|
66
68
|
batch_size = 16
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/position_embedding.py
RENAMED
|
@@ -33,6 +33,8 @@ class PositionEmbedding(keras.layers.Layer):
|
|
|
33
33
|
initializer: The initializer to use for the embedding weights. Defaults
|
|
34
34
|
to `"glorot_uniform"`.
|
|
35
35
|
seq_axis: The axis of the input tensor where we add the embeddings.
|
|
36
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
37
|
+
including `name`, `trainable`, `dtype` etc.
|
|
36
38
|
|
|
37
39
|
Call arguments:
|
|
38
40
|
inputs: The tensor inputs to compute an embedding for, with shape
|
|
@@ -43,7 +45,7 @@ class PositionEmbedding(keras.layers.Layer):
|
|
|
43
45
|
compute the position embedding from. This is useful during cached
|
|
44
46
|
decoding, where each position is predicted separately in a loop.
|
|
45
47
|
|
|
46
|
-
|
|
48
|
+
Example:
|
|
47
49
|
|
|
48
50
|
Called directly on input.
|
|
49
51
|
>>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10)
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/reversible_embedding.py
RENAMED
|
@@ -52,6 +52,8 @@ class ReversibleEmbedding(keras.layers.Embedding):
|
|
|
52
52
|
reverse_dtype: The dtype for the reverse projection computation.
|
|
53
53
|
For stability, it is usually best to use full precision even when
|
|
54
54
|
working with half or mixed precision training.
|
|
55
|
+
**kwargs: other keyword arguments passed to `keras.layers.Embedding`,
|
|
56
|
+
including `name`, `trainable`, `dtype` etc.
|
|
55
57
|
|
|
56
58
|
Call arguments:
|
|
57
59
|
inputs: The tensor inputs to the layer.
|
|
@@ -59,7 +61,7 @@ class ReversibleEmbedding(keras.layers.Embedding):
|
|
|
59
61
|
from `output_dim` to `input_dim`, instead of a normal embedding
|
|
60
62
|
call. Default to `False`.
|
|
61
63
|
|
|
62
|
-
|
|
64
|
+
Example:
|
|
63
65
|
```python
|
|
64
66
|
batch_size = 16
|
|
65
67
|
vocab_size = 100
|
|
@@ -73,7 +75,7 @@ class ReversibleEmbedding(keras.layers.Embedding):
|
|
|
73
75
|
# Embed tokens to shape `(batch_size, seq_length, hidden_dim)`.
|
|
74
76
|
hidden_states = embedding(token_ids)
|
|
75
77
|
# Project hidden states to shape `(batch_size, seq_length, vocab_size)`.
|
|
76
|
-
logits = embedding(
|
|
78
|
+
logits = embedding(hidden_states, reverse=True)
|
|
77
79
|
```
|
|
78
80
|
|
|
79
81
|
References:
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/rotary_embedding.py
RENAMED
|
@@ -38,6 +38,8 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
38
38
|
scaling_factor: float. The scaling factor used to scale frequency range.
|
|
39
39
|
sequence_axis: int. Sequence axis in the input tensor.
|
|
40
40
|
feature_axis: int. Feature axis in the input tensor.
|
|
41
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
42
|
+
including `name`, `trainable`, `dtype` etc.
|
|
41
43
|
|
|
42
44
|
Call arguments:
|
|
43
45
|
inputs: The tensor inputs to apply the embedding to. This can have
|
|
@@ -85,30 +87,42 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
85
87
|
self.built = True
|
|
86
88
|
|
|
87
89
|
def call(self, inputs, start_index=0):
|
|
90
|
+
inputs = ops.moveaxis(
|
|
91
|
+
inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
|
|
92
|
+
)
|
|
88
93
|
cos_emb, sin_emb = self._compute_cos_sin_embedding(inputs, start_index)
|
|
89
|
-
|
|
94
|
+
output = self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
|
|
95
|
+
return ops.moveaxis(
|
|
96
|
+
output, (-1, 1), (self.feature_axis, self.sequence_axis)
|
|
97
|
+
)
|
|
90
98
|
|
|
91
99
|
def _apply_rotary_pos_emb(self, tensor, cos_emb, sin_emb):
|
|
92
|
-
x1, x2 = ops.split(tensor, 2, axis
|
|
93
|
-
|
|
100
|
+
x1, x2 = ops.split(tensor, 2, axis=-1)
|
|
101
|
+
# Avoid `ops.concatenate` for now, to avoid a obscure bug with XLA
|
|
102
|
+
# compilation on jax. We should be able to remove this once the
|
|
103
|
+
# following PR is in all jax releases we care about:
|
|
104
|
+
# https://github.com/openxla/xla/pull/7875
|
|
105
|
+
half_rot_tensor = ops.stack((-x2, x1), axis=-2)
|
|
106
|
+
half_rot_tensor = ops.reshape(half_rot_tensor, ops.shape(tensor))
|
|
94
107
|
return (tensor * cos_emb) + (half_rot_tensor * sin_emb)
|
|
95
108
|
|
|
96
109
|
def _compute_cos_sin_embedding(self, inputs, start_index=0):
|
|
97
|
-
|
|
98
|
-
return axis if axis > 0 else len(inputs.shape) + axis
|
|
110
|
+
start_index = ops.cast(start_index, dtype="float32")
|
|
99
111
|
|
|
100
|
-
feature_axis =
|
|
101
|
-
sequence_axis =
|
|
112
|
+
feature_axis = len(inputs.shape) - 1
|
|
113
|
+
sequence_axis = 1
|
|
102
114
|
|
|
103
115
|
rotary_dim = ops.shape(inputs)[feature_axis]
|
|
104
116
|
inverse_freq = self._get_inverse_freq(rotary_dim)
|
|
105
117
|
|
|
106
|
-
seq_len = ops.shape(inputs)[
|
|
107
|
-
tensor = ops.
|
|
118
|
+
seq_len = ops.shape(inputs)[sequence_axis]
|
|
119
|
+
tensor = ops.arange(seq_len, dtype="float32") + start_index
|
|
108
120
|
|
|
109
|
-
tensor = ops.cast(tensor, dtype=inverse_freq.dtype)
|
|
110
121
|
freq = ops.einsum("i,j->ij", tensor, inverse_freq)
|
|
111
|
-
embedding = ops.
|
|
122
|
+
embedding = ops.stack((freq, freq), axis=-2)
|
|
123
|
+
embedding = ops.reshape(
|
|
124
|
+
embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
|
|
125
|
+
)
|
|
112
126
|
|
|
113
127
|
# Reshape the embedding to be broadcastable with input shape.
|
|
114
128
|
if feature_axis < sequence_axis:
|
|
@@ -117,17 +131,16 @@ class RotaryEmbedding(keras.layers.Layer):
|
|
|
117
131
|
if axis != sequence_axis and axis != feature_axis:
|
|
118
132
|
embedding = ops.expand_dims(embedding, axis)
|
|
119
133
|
|
|
120
|
-
|
|
134
|
+
cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
|
|
135
|
+
sin_emb = ops.cast(ops.sin(embedding), self.compute_dtype)
|
|
136
|
+
return cos_emb, sin_emb
|
|
121
137
|
|
|
122
138
|
def _get_inverse_freq(self, rotary_dim):
|
|
123
|
-
freq_range = ops.arange(0, rotary_dim, 2)
|
|
124
|
-
freq_range = ops.cast(
|
|
125
|
-
freq_range = freq_range / ops.cast(
|
|
126
|
-
self.scaling_factor, self.compute_dtype
|
|
127
|
-
)
|
|
139
|
+
freq_range = ops.arange(0, rotary_dim, 2, dtype="float32")
|
|
140
|
+
freq_range = freq_range / ops.cast(self.scaling_factor, "float32")
|
|
128
141
|
inverse_freq = 1.0 / (
|
|
129
142
|
self.max_wavelength
|
|
130
|
-
** (freq_range / ops.cast(rotary_dim,
|
|
143
|
+
** (freq_range / ops.cast(rotary_dim, "float32"))
|
|
131
144
|
)
|
|
132
145
|
return inverse_freq
|
|
133
146
|
|
|
@@ -34,6 +34,8 @@ class SinePositionEncoding(keras.layers.Layer):
|
|
|
34
34
|
max_wavelength: The maximum angular wavelength of the sine/cosine
|
|
35
35
|
curves, as described in Attention is All You Need. Defaults to
|
|
36
36
|
`10000`.
|
|
37
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
38
|
+
including `name`, `trainable`, `dtype` etc.
|
|
37
39
|
|
|
38
40
|
Call arguments:
|
|
39
41
|
inputs: The tensor inputs to compute an embedding for, with shape
|
|
@@ -42,7 +44,7 @@ class SinePositionEncoding(keras.layers.Layer):
|
|
|
42
44
|
compute the encoding from. This is useful during cached decoding,
|
|
43
45
|
where each position is predicted separately in a loop.
|
|
44
46
|
|
|
45
|
-
|
|
47
|
+
Example:
|
|
46
48
|
```python
|
|
47
49
|
# create a simple embedding layer with sinusoidal positional encoding
|
|
48
50
|
seq_len = 100
|
|
@@ -33,6 +33,9 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
|
|
|
33
33
|
vocabulary_size: The size of the vocabulary.
|
|
34
34
|
sequence_length: The maximum length of input sequence
|
|
35
35
|
embedding_dim: The output dimension of the embedding layer
|
|
36
|
+
tie_weights: Boolean, whether or not the matrix for embedding and
|
|
37
|
+
the matrix for the `reverse` projection should share the same
|
|
38
|
+
weights.
|
|
36
39
|
embeddings_initializer: The initializer to use for the Embedding
|
|
37
40
|
Layers
|
|
38
41
|
mask_zero: Boolean, whether or not the input value 0 is a special
|
|
@@ -43,8 +46,10 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
|
|
|
43
46
|
If mask_zero` is set to True, as a consequence, index 0 cannot be
|
|
44
47
|
used in the vocabulary
|
|
45
48
|
(input_dim should equal size of vocabulary + 1).
|
|
49
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
50
|
+
including `name`, `trainable`, `dtype` etc.
|
|
46
51
|
|
|
47
|
-
|
|
52
|
+
Example:
|
|
48
53
|
```python
|
|
49
54
|
inputs = np.ones(shape=(1, 50), dtype="int32")
|
|
50
55
|
embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_decoder.py
RENAMED
|
@@ -34,12 +34,9 @@ class TransformerDecoder(keras.layers.Layer):
|
|
|
34
34
|
paper [Attention is All You Need](https://arxiv.org/abs/1706.03762). Users
|
|
35
35
|
can instantiate multiple instances of this class to stack up a decoder.
|
|
36
36
|
|
|
37
|
-
By default, this layer will apply a causal mask to the decoder attention
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
`keras.layers.Embedding` layer). See the Masking and Padding
|
|
41
|
-
[guide](https://keras.io/guides/understanding_masking_and_padding/)
|
|
42
|
-
for more details.
|
|
37
|
+
By default, this layer will apply a causal mask to the decoder attention
|
|
38
|
+
layer. You can also pass padding or attention masks directly to the layer
|
|
39
|
+
during call, e.g. with `decoder_padding_mask` or `decoder_attention_mask`.
|
|
43
40
|
|
|
44
41
|
This layer can be called with either one or two inputs. The number of inputs
|
|
45
42
|
must be consistent across all calls. The options are as follows:
|
|
@@ -72,10 +69,10 @@ class TransformerDecoder(keras.layers.Layer):
|
|
|
72
69
|
(similar to GPT-2). If set to False, outputs of attention layer and
|
|
73
70
|
intermediate dense layer are normalized (similar to BERT).
|
|
74
71
|
Defaults to `False`.
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
73
|
+
including `name`, `trainable`, `dtype` etc.
|
|
77
74
|
|
|
78
|
-
|
|
75
|
+
Example:
|
|
79
76
|
```python
|
|
80
77
|
# Create a single transformer decoder layer.
|
|
81
78
|
decoder = keras_nlp.layers.TransformerDecoder(
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_encoder.py
RENAMED
|
@@ -58,10 +58,10 @@ class TransformerEncoder(keras.layers.Layer):
|
|
|
58
58
|
(similar to GPT-2). If set to False, outputs of attention layer and
|
|
59
59
|
intermediate dense layer are normalized (similar to BERT).
|
|
60
60
|
Defaults to `False`.
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
|
62
|
+
including `name`, `trainable`, `dtype` etc.
|
|
63
63
|
|
|
64
|
-
|
|
64
|
+
Example:
|
|
65
65
|
|
|
66
66
|
```python
|
|
67
67
|
# Create a single transformer encoder layer.
|
|
@@ -20,6 +20,7 @@ from keras_nlp.src.models.albert.albert_masked_lm_preprocessor import (
|
|
|
20
20
|
)
|
|
21
21
|
from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
|
|
22
22
|
from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
|
|
23
|
+
from keras_nlp.src.models.backbone import Backbone
|
|
23
24
|
from keras_nlp.src.models.bart.bart_backbone import BartBackbone
|
|
24
25
|
from keras_nlp.src.models.bart.bart_preprocessor import BartPreprocessor
|
|
25
26
|
from keras_nlp.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
|
|
@@ -36,7 +37,14 @@ from keras_nlp.src.models.bert.bert_masked_lm_preprocessor import (
|
|
|
36
37
|
from keras_nlp.src.models.bert.bert_preprocessor import BertPreprocessor
|
|
37
38
|
from keras_nlp.src.models.bert.bert_tokenizer import BertTokenizer
|
|
38
39
|
from keras_nlp.src.models.bloom.bloom_backbone import BloomBackbone
|
|
40
|
+
from keras_nlp.src.models.bloom.bloom_causal_lm import BloomCausalLM
|
|
41
|
+
from keras_nlp.src.models.bloom.bloom_causal_lm_preprocessor import (
|
|
42
|
+
BloomCausalLMPreprocessor,
|
|
43
|
+
)
|
|
44
|
+
from keras_nlp.src.models.bloom.bloom_preprocessor import BloomPreprocessor
|
|
39
45
|
from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
|
|
46
|
+
from keras_nlp.src.models.causal_lm import CausalLM
|
|
47
|
+
from keras_nlp.src.models.classifier import Classifier
|
|
40
48
|
from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import DebertaV3Backbone
|
|
41
49
|
from keras_nlp.src.models.deberta_v3.deberta_v3_classifier import (
|
|
42
50
|
DebertaV3Classifier,
|
|
@@ -66,6 +74,7 @@ from keras_nlp.src.models.distil_bert.distil_bert_tokenizer import (
|
|
|
66
74
|
DistilBertTokenizer,
|
|
67
75
|
)
|
|
68
76
|
from keras_nlp.src.models.electra.electra_backbone import ElectraBackbone
|
|
77
|
+
from keras_nlp.src.models.electra.electra_preprocessor import ElectraPreprocessor
|
|
69
78
|
from keras_nlp.src.models.electra.electra_tokenizer import ElectraTokenizer
|
|
70
79
|
from keras_nlp.src.models.f_net.f_net_backbone import FNetBackbone
|
|
71
80
|
from keras_nlp.src.models.f_net.f_net_classifier import FNetClassifier
|
|
@@ -75,6 +84,8 @@ from keras_nlp.src.models.f_net.f_net_masked_lm_preprocessor import (
|
|
|
75
84
|
)
|
|
76
85
|
from keras_nlp.src.models.f_net.f_net_preprocessor import FNetPreprocessor
|
|
77
86
|
from keras_nlp.src.models.f_net.f_net_tokenizer import FNetTokenizer
|
|
87
|
+
from keras_nlp.src.models.falcon.falcon_backbone import FalconBackbone
|
|
88
|
+
from keras_nlp.src.models.falcon.falcon_tokenizer import FalconTokenizer
|
|
78
89
|
from keras_nlp.src.models.gemma.gemma_backbone import GemmaBackbone
|
|
79
90
|
from keras_nlp.src.models.gemma.gemma_causal_lm import GemmaCausalLM
|
|
80
91
|
from keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor import (
|
|
@@ -99,6 +110,13 @@ from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_preprocessor import (
|
|
|
99
110
|
)
|
|
100
111
|
from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
|
|
101
112
|
from keras_nlp.src.models.llama.llama_backbone import LlamaBackbone
|
|
113
|
+
from keras_nlp.src.models.llama.llama_causal_lm import LlamaCausalLM
|
|
114
|
+
from keras_nlp.src.models.llama.llama_causal_lm_preprocessor import (
|
|
115
|
+
LlamaCausalLMPreprocessor,
|
|
116
|
+
)
|
|
117
|
+
from keras_nlp.src.models.llama.llama_preprocessor import LlamaPreprocessor
|
|
118
|
+
from keras_nlp.src.models.llama.llama_tokenizer import LlamaTokenizer
|
|
119
|
+
from keras_nlp.src.models.masked_lm import MaskedLM
|
|
102
120
|
from keras_nlp.src.models.mistral.mistral_backbone import MistralBackbone
|
|
103
121
|
from keras_nlp.src.models.mistral.mistral_causal_lm import MistralCausalLM
|
|
104
122
|
from keras_nlp.src.models.mistral.mistral_causal_lm_preprocessor import (
|
|
@@ -113,6 +131,7 @@ from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import (
|
|
|
113
131
|
)
|
|
114
132
|
from keras_nlp.src.models.opt.opt_preprocessor import OPTPreprocessor
|
|
115
133
|
from keras_nlp.src.models.opt.opt_tokenizer import OPTTokenizer
|
|
134
|
+
from keras_nlp.src.models.preprocessor import Preprocessor
|
|
116
135
|
from keras_nlp.src.models.roberta.roberta_backbone import RobertaBackbone
|
|
117
136
|
from keras_nlp.src.models.roberta.roberta_classifier import RobertaClassifier
|
|
118
137
|
from keras_nlp.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
|
|
@@ -121,8 +140,10 @@ from keras_nlp.src.models.roberta.roberta_masked_lm_preprocessor import (
|
|
|
121
140
|
)
|
|
122
141
|
from keras_nlp.src.models.roberta.roberta_preprocessor import RobertaPreprocessor
|
|
123
142
|
from keras_nlp.src.models.roberta.roberta_tokenizer import RobertaTokenizer
|
|
143
|
+
from keras_nlp.src.models.seq_2_seq_lm import Seq2SeqLM
|
|
124
144
|
from keras_nlp.src.models.t5.t5_backbone import T5Backbone
|
|
125
145
|
from keras_nlp.src.models.t5.t5_tokenizer import T5Tokenizer
|
|
146
|
+
from keras_nlp.src.models.task import Task
|
|
126
147
|
from keras_nlp.src.models.whisper.whisper_audio_feature_extractor import (
|
|
127
148
|
WhisperAudioFeatureExtractor,
|
|
128
149
|
)
|
|
@@ -146,4 +167,5 @@ from keras_nlp.src.models.xlm_roberta.xlm_roberta_tokenizer import (
|
|
|
146
167
|
XLMRobertaTokenizer,
|
|
147
168
|
)
|
|
148
169
|
from keras_nlp.src.models.xlnet.xlnet_backbone import XLNetBackbone
|
|
170
|
+
from keras_nlp.src.tokenizers.tokenizer import Tokenizer
|
|
149
171
|
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2023 The KerasNLP Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from keras_nlp.src.models.albert.albert_backbone import AlbertBackbone
|
|
16
|
+
from keras_nlp.src.models.albert.albert_presets import backbone_presets
|
|
17
|
+
from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
|
|
18
|
+
from keras_nlp.src.utils.preset_utils import register_presets
|
|
19
|
+
|
|
20
|
+
register_presets(backbone_presets, (AlbertBackbone, AlbertTokenizer))
|
|
21
|
+
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_backbone.py
RENAMED
|
@@ -12,17 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import copy
|
|
16
|
-
|
|
17
15
|
from keras_nlp.src.api_export import keras_nlp_export
|
|
18
16
|
from keras_nlp.src.backend import keras
|
|
19
17
|
from keras_nlp.src.layers.modeling.position_embedding import PositionEmbedding
|
|
20
18
|
from keras_nlp.src.layers.modeling.reversible_embedding import ReversibleEmbedding
|
|
21
19
|
from keras_nlp.src.layers.modeling.transformer_encoder import TransformerEncoder
|
|
22
|
-
from keras_nlp.src.models.albert.albert_presets import backbone_presets
|
|
23
20
|
from keras_nlp.src.models.backbone import Backbone
|
|
24
21
|
from keras_nlp.src.utils.keras_utils import gelu_approximate
|
|
25
|
-
from keras_nlp.src.utils.python_utils import classproperty
|
|
26
22
|
|
|
27
23
|
|
|
28
24
|
def albert_kernel_initializer(stddev=0.02):
|
|
@@ -77,7 +73,7 @@ class AlbertBackbone(Backbone):
|
|
|
77
73
|
such as softmax and layer normalization, will always be done at
|
|
78
74
|
float32 precision regardless of dtype.
|
|
79
75
|
|
|
80
|
-
|
|
76
|
+
Example:
|
|
81
77
|
```python
|
|
82
78
|
input_data = {
|
|
83
79
|
"token_ids": np.ones(shape=(1, 12), dtype="int32"),
|
|
@@ -230,6 +226,7 @@ class AlbertBackbone(Backbone):
|
|
|
230
226
|
"sequence_output": sequence_output,
|
|
231
227
|
"pooled_output": pooled_output,
|
|
232
228
|
},
|
|
229
|
+
dtype=dtype,
|
|
233
230
|
**kwargs,
|
|
234
231
|
)
|
|
235
232
|
|
|
@@ -266,7 +263,3 @@ class AlbertBackbone(Backbone):
|
|
|
266
263
|
)
|
|
267
264
|
return config
|
|
268
265
|
|
|
269
|
-
@classproperty
|
|
270
|
-
def presets(cls):
|
|
271
|
-
return copy.deepcopy(backbone_presets)
|
|
272
|
-
|
{keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_classifier.py
RENAMED
|
@@ -12,20 +12,16 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import copy
|
|
16
|
-
|
|
17
15
|
from keras_nlp.src.api_export import keras_nlp_export
|
|
18
16
|
from keras_nlp.src.backend import keras
|
|
19
17
|
from keras_nlp.src.models.albert.albert_backbone import AlbertBackbone
|
|
20
18
|
from keras_nlp.src.models.albert.albert_backbone import albert_kernel_initializer
|
|
21
19
|
from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
|
|
22
|
-
from keras_nlp.src.models.
|
|
23
|
-
from keras_nlp.src.models.task import Task
|
|
24
|
-
from keras_nlp.src.utils.python_utils import classproperty
|
|
20
|
+
from keras_nlp.src.models.classifier import Classifier
|
|
25
21
|
|
|
26
22
|
|
|
27
23
|
@keras_nlp_export("keras_nlp.models.AlbertClassifier")
|
|
28
|
-
class AlbertClassifier(
|
|
24
|
+
class AlbertClassifier(Classifier):
|
|
29
25
|
"""An end-to-end ALBERT model for classification tasks
|
|
30
26
|
|
|
31
27
|
This model attaches a classification head to a `keras_nlp.model.AlbertBackbone`
|
|
@@ -146,6 +142,9 @@ class AlbertClassifier(Task):
|
|
|
146
142
|
```
|
|
147
143
|
"""
|
|
148
144
|
|
|
145
|
+
backbone_cls = AlbertBackbone
|
|
146
|
+
preprocessor_cls = AlbertPreprocessor
|
|
147
|
+
|
|
149
148
|
def __init__(
|
|
150
149
|
self,
|
|
151
150
|
backbone,
|
|
@@ -187,17 +186,6 @@ class AlbertClassifier(Task):
|
|
|
187
186
|
self.activation = keras.activations.get(activation)
|
|
188
187
|
self.dropout = dropout
|
|
189
188
|
|
|
190
|
-
# === Default compilation ===
|
|
191
|
-
logit_output = self.activation == keras.activations.linear
|
|
192
|
-
self.compile(
|
|
193
|
-
loss=keras.losses.SparseCategoricalCrossentropy(
|
|
194
|
-
from_logits=logit_output
|
|
195
|
-
),
|
|
196
|
-
optimizer=keras.optimizers.Adam(5e-5),
|
|
197
|
-
metrics=[keras.metrics.SparseCategoricalAccuracy()],
|
|
198
|
-
jit_compile=True,
|
|
199
|
-
)
|
|
200
|
-
|
|
201
189
|
def get_config(self):
|
|
202
190
|
config = super().get_config()
|
|
203
191
|
config.update(
|
|
@@ -210,15 +198,3 @@ class AlbertClassifier(Task):
|
|
|
210
198
|
|
|
211
199
|
return config
|
|
212
200
|
|
|
213
|
-
@classproperty
|
|
214
|
-
def backbone_cls(cls):
|
|
215
|
-
return AlbertBackbone
|
|
216
|
-
|
|
217
|
-
@classproperty
|
|
218
|
-
def preprocessor_cls(cls):
|
|
219
|
-
return AlbertPreprocessor
|
|
220
|
-
|
|
221
|
-
@classproperty
|
|
222
|
-
def presets(cls):
|
|
223
|
-
return copy.deepcopy({**backbone_presets})
|
|
224
|
-
|