keras-nlp 0.8.2.dev0__tar.gz → 0.9.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/PKG-INFO +1 -1
  2. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/__init__.py +1 -0
  3. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/models/__init__.py +24 -0
  4. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/__init__.py +1 -0
  5. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/conftest.py +18 -0
  6. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/alibi_bias.py +7 -2
  7. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/f_net_encoder.py +4 -5
  8. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/masked_lm_head.py +3 -1
  9. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/position_embedding.py +3 -1
  10. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/reversible_embedding.py +4 -2
  11. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/rotary_embedding.py +31 -18
  12. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/sine_position_encoding.py +3 -1
  13. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/token_and_position_embedding.py +6 -1
  14. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_decoder.py +6 -9
  15. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_encoder.py +3 -3
  16. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/__init__.py +22 -0
  17. keras-nlp-0.9.0.dev0/keras_nlp/src/models/albert/__init__.py +21 -0
  18. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_backbone.py +2 -9
  19. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_classifier.py +5 -29
  20. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_masked_lm.py +6 -27
  21. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_preprocessor.py +2 -12
  22. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_tokenizer.py +0 -8
  23. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/backbone.py +100 -42
  24. keras-nlp-0.9.0.dev0/keras_nlp/src/models/bart/__init__.py +21 -0
  25. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_backbone.py +1 -8
  26. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_preprocessor.py +2 -11
  27. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_seq_2_seq_lm.py +16 -35
  28. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +0 -7
  29. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_tokenizer.py +0 -7
  30. keras-nlp-0.9.0.dev0/keras_nlp/src/models/bert/__init__.py +24 -0
  31. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_backbone.py +1 -8
  32. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_classifier.py +5 -30
  33. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_masked_lm.py +6 -29
  34. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_preprocessor.py +2 -15
  35. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_tokenizer.py +15 -19
  36. keras-nlp-0.9.0.dev0/keras_nlp/src/models/bloom/__init__.py +21 -0
  37. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_backbone.py +4 -16
  38. keras-nlp-0.9.0.dev0/keras_nlp/src/models/bloom/bloom_causal_lm.py +298 -0
  39. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -6
  40. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_decoder.py +15 -19
  41. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_preprocessor.py +6 -15
  42. keras-nlp-0.9.0.dev0/keras_nlp/src/models/bloom/bloom_presets.py +122 -0
  43. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_tokenizer.py +11 -18
  44. keras-nlp-0.8.2.dev0/keras_nlp/src/models/generative_task.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/causal_lm.py +137 -28
  45. keras-nlp-0.9.0.dev0/keras_nlp/src/models/classifier.py +113 -0
  46. keras-nlp-0.9.0.dev0/keras_nlp/src/models/deberta_v3/__init__.py +21 -0
  47. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_backbone.py +1 -7
  48. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_classifier.py +5 -28
  49. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm.py +6 -26
  50. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_preprocessor.py +2 -11
  51. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_tokenizer.py +0 -7
  52. keras-nlp-0.9.0.dev0/keras_nlp/src/models/distil_bert/__init__.py +23 -0
  53. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_backbone.py +1 -7
  54. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_classifier.py +5 -28
  55. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_masked_lm.py +6 -26
  56. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_preprocessor.py +2 -11
  57. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_tokenizer.py +15 -15
  58. keras-nlp-0.9.0.dev0/keras_nlp/src/models/electra/__init__.py +21 -0
  59. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/electra/electra_backbone.py +14 -4
  60. keras-nlp-0.9.0.dev0/keras_nlp/src/models/electra/electra_preprocessor.py +154 -0
  61. keras-nlp-0.9.0.dev0/keras_nlp/src/models/electra/electra_presets.py +96 -0
  62. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/electra/electra_tokenizer.py +29 -13
  63. keras-nlp-0.9.0.dev0/keras_nlp/src/models/f_net/__init__.py +21 -0
  64. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_backbone.py +1 -7
  65. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_classifier.py +5 -28
  66. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_masked_lm.py +6 -26
  67. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_preprocessor.py +2 -11
  68. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_tokenizer.py +0 -7
  69. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/__init__.py +21 -0
  70. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_attention.py +157 -0
  71. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_backbone.py +162 -0
  72. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_causal_lm_preprocessor.py +179 -0
  73. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_preprocessor.py +187 -0
  74. keras-nlp-0.8.2.dev0/keras_nlp/src/models/bloom/bloom_presets.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_presets.py +10 -10
  75. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_tokenizer.py +111 -0
  76. keras-nlp-0.9.0.dev0/keras_nlp/src/models/falcon/falcon_transformer_decoder.py +255 -0
  77. keras-nlp-0.9.0.dev0/keras_nlp/src/models/gemma/__init__.py +21 -0
  78. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_attention.py +17 -26
  79. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_backbone.py +26 -17
  80. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_causal_lm.py +40 -35
  81. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_causal_lm_preprocessor.py +3 -6
  82. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_preprocessor.py +2 -11
  83. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_tokenizer.py +0 -7
  84. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/rms_normalization.py +1 -1
  85. keras-nlp-0.9.0.dev0/keras_nlp/src/models/gpt2/__init__.py +21 -0
  86. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_backbone.py +1 -7
  87. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_causal_lm.py +147 -35
  88. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_causal_lm_preprocessor.py +2 -2
  89. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_preprocessor.py +2 -11
  90. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_tokenizer.py +0 -7
  91. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_backbone.py +1 -0
  92. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +16 -29
  93. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +2 -2
  94. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +2 -5
  95. keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/__init__.py +21 -0
  96. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_attention.py +76 -62
  97. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_backbone.py +64 -38
  98. keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_causal_lm.py +334 -0
  99. keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_causal_lm_preprocessor.py +186 -0
  100. keras-nlp-0.8.2.dev0/keras_nlp/src/models/mistral/mistral_transformer_decoder.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_decoder.py +19 -23
  101. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_layernorm.py +21 -10
  102. keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_preprocessor.py +189 -0
  103. keras-nlp-0.9.0.dev0/keras_nlp/src/models/llama/llama_presets.py +39 -0
  104. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/llama/llama_tokenizer.py +8 -0
  105. keras-nlp-0.9.0.dev0/keras_nlp/src/models/masked_lm.py +105 -0
  106. keras-nlp-0.9.0.dev0/keras_nlp/src/models/mistral/__init__.py +21 -0
  107. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_attention.py +0 -5
  108. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_backbone.py +2 -7
  109. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_causal_lm.py +141 -34
  110. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_causal_lm_preprocessor.py +2 -2
  111. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_layer_norm.py +11 -10
  112. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_preprocessor.py +2 -11
  113. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_presets.py +10 -0
  114. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/mistral/mistral_tokenizer.py +1 -8
  115. keras-nlp-0.8.2.dev0/keras_nlp/src/models/llama/llama_decoder.py → keras-nlp-0.9.0.dev0/keras_nlp/src/models/mistral/mistral_transformer_decoder.py +81 -33
  116. keras-nlp-0.9.0.dev0/keras_nlp/src/models/opt/__init__.py +21 -0
  117. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_backbone.py +1 -7
  118. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_causal_lm.py +16 -36
  119. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_causal_lm_preprocessor.py +2 -2
  120. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_preprocessor.py +2 -11
  121. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_tokenizer.py +0 -7
  122. keras-nlp-0.9.0.dev0/keras_nlp/src/models/preprocessor.py +156 -0
  123. keras-nlp-0.9.0.dev0/keras_nlp/src/models/roberta/__init__.py +21 -0
  124. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_backbone.py +1 -7
  125. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_classifier.py +5 -28
  126. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_masked_lm.py +5 -25
  127. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_preprocessor.py +2 -11
  128. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_tokenizer.py +0 -7
  129. keras-nlp-0.9.0.dev0/keras_nlp/src/models/seq_2_seq_lm.py +55 -0
  130. keras-nlp-0.9.0.dev0/keras_nlp/src/models/t5/__init__.py +21 -0
  131. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_backbone.py +1 -7
  132. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_tokenizer.py +0 -7
  133. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/task.py +107 -104
  134. keras-nlp-0.9.0.dev0/keras_nlp/src/models/whisper/__init__.py +21 -0
  135. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_audio_feature_extractor.py +0 -53
  136. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_backbone.py +1 -7
  137. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_preprocessor.py +2 -15
  138. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_tokenizer.py +0 -7
  139. keras-nlp-0.9.0.dev0/keras_nlp/src/models/xlm_roberta/__init__.py +23 -0
  140. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_backbone.py +0 -7
  141. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_classifier.py +5 -28
  142. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_masked_lm.py +6 -26
  143. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_preprocessor.py +2 -11
  144. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_tokenizer.py +0 -7
  145. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/xlnet_backbone.py +2 -1
  146. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/beam_sampler.py +5 -4
  147. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/contrastive_sampler.py +5 -5
  148. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/sampler.py +6 -5
  149. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tests/test_case.py +4 -6
  150. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/byte_pair_tokenizer.py +12 -75
  151. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/byte_tokenizer.py +7 -7
  152. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/sentence_piece_tokenizer.py +10 -73
  153. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/tokenizer.py +95 -10
  154. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/unicode_codepoint_tokenizer.py +9 -9
  155. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/word_piece_tokenizer.py +97 -82
  156. keras-nlp-0.9.0.dev0/keras_nlp/src/utils/preset_utils.py +412 -0
  157. keras-nlp-0.9.0.dev0/keras_nlp/src/utils/python_utils.py +22 -0
  158. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/utils/tensor_utils.py +27 -1
  159. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/version_utils.py +1 -1
  160. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/PKG-INFO +1 -1
  161. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/SOURCES.txt +19 -1
  162. keras-nlp-0.8.2.dev0/keras_nlp/src/models/bart/__init__.py +0 -14
  163. keras-nlp-0.8.2.dev0/keras_nlp/src/models/distil_bert/__init__.py +0 -14
  164. keras-nlp-0.8.2.dev0/keras_nlp/src/models/electra/__init__.py +0 -14
  165. keras-nlp-0.8.2.dev0/keras_nlp/src/models/f_net/__init__.py +0 -14
  166. keras-nlp-0.8.2.dev0/keras_nlp/src/models/gemma/__init__.py +0 -14
  167. keras-nlp-0.8.2.dev0/keras_nlp/src/models/gpt2/__init__.py +0 -14
  168. keras-nlp-0.8.2.dev0/keras_nlp/src/models/gpt_neo_x/__init__.py +0 -14
  169. keras-nlp-0.8.2.dev0/keras_nlp/src/models/llama/__init__.py +0 -14
  170. keras-nlp-0.8.2.dev0/keras_nlp/src/models/mistral/__init__.py +0 -14
  171. keras-nlp-0.8.2.dev0/keras_nlp/src/models/opt/__init__.py +0 -14
  172. keras-nlp-0.8.2.dev0/keras_nlp/src/models/preprocessor.py +0 -120
  173. keras-nlp-0.8.2.dev0/keras_nlp/src/models/roberta/__init__.py +0 -14
  174. keras-nlp-0.8.2.dev0/keras_nlp/src/models/t5/__init__.py +0 -14
  175. keras-nlp-0.8.2.dev0/keras_nlp/src/models/whisper/__init__.py +0 -14
  176. keras-nlp-0.8.2.dev0/keras_nlp/src/models/xlm_roberta/__init__.py +0 -14
  177. keras-nlp-0.8.2.dev0/keras_nlp/src/models/xlnet/__init__.py +0 -14
  178. keras-nlp-0.8.2.dev0/keras_nlp/src/tests/__init__.py +0 -14
  179. keras-nlp-0.8.2.dev0/keras_nlp/src/utils/__init__.py +0 -14
  180. keras-nlp-0.8.2.dev0/keras_nlp/src/utils/preset_utils.py +0 -219
  181. keras-nlp-0.8.2.dev0/keras_nlp/src/utils/python_utils.py +0 -47
  182. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/README.md +0 -0
  183. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/layers/__init__.py +0 -0
  184. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/metrics/__init__.py +0 -0
  185. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/samplers/__init__.py +0 -0
  186. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/api_export.py +0 -0
  187. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/__init__.py +0 -0
  188. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/config.py +0 -0
  189. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/keras.py +0 -0
  190. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/ops.py +0 -0
  191. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/backend/random.py +0 -0
  192. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/__init__.py +0 -0
  193. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/__init__.py +0 -0
  194. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/cached_multi_head_attention.py +0 -0
  195. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/modeling/transformer_layer_utils.py +0 -0
  196. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/__init__.py +0 -0
  197. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/masked_lm_mask_generator.py +0 -0
  198. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/multi_segment_packer.py +0 -0
  199. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/preprocessing_layer.py +0 -0
  200. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/random_deletion.py +0 -0
  201. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/random_swap.py +0 -0
  202. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/layers/preprocessing/start_end_packer.py +0 -0
  203. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/__init__.py +0 -0
  204. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/bleu.py +0 -0
  205. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/edit_distance.py +0 -0
  206. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/perplexity.py +0 -0
  207. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/rouge_base.py +0 -0
  208. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/rouge_l.py +0 -0
  209. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/metrics/rouge_n.py +0 -0
  210. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_masked_lm_preprocessor.py +0 -0
  211. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/albert/albert_presets.py +0 -0
  212. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bart/bart_presets.py +0 -0
  213. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_masked_lm_preprocessor.py +0 -0
  214. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bert/bert_presets.py +0 -0
  215. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/bloom/bloom_attention.py +0 -0
  216. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +0 -0
  217. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/deberta_v3_presets.py +0 -0
  218. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/disentangled_attention_encoder.py +0 -0
  219. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/disentangled_self_attention.py +0 -0
  220. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/deberta_v3/relative_embedding.py +0 -0
  221. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +0 -0
  222. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/distil_bert/distil_bert_presets.py +0 -0
  223. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_masked_lm_preprocessor.py +0 -0
  224. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/f_net/f_net_presets.py +0 -0
  225. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_decoder_block.py +0 -0
  226. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gemma/gemma_presets.py +0 -0
  227. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt2/gpt2_presets.py +0 -0
  228. {keras-nlp-0.8.2.dev0/keras_nlp/src/models/albert → keras-nlp-0.9.0.dev0/keras_nlp/src/models/gpt_neo_x}/__init__.py +0 -0
  229. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_attention.py +0 -0
  230. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_decoder.py +0 -0
  231. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +0 -0
  232. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/opt/opt_presets.py +0 -0
  233. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_masked_lm_preprocessor.py +0 -0
  234. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/roberta/roberta_presets.py +0 -0
  235. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_layer_norm.py +0 -0
  236. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_multi_head_attention.py +0 -0
  237. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_presets.py +0 -0
  238. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/t5/t5_transformer_layer.py +0 -0
  239. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_cached_multi_head_attention.py +0 -0
  240. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_decoder.py +0 -0
  241. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_encoder.py +0 -0
  242. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/whisper/whisper_presets.py +0 -0
  243. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +0 -0
  244. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlm_roberta/xlm_roberta_presets.py +0 -0
  245. {keras-nlp-0.8.2.dev0/keras_nlp/src/models/bert → keras-nlp-0.9.0.dev0/keras_nlp/src/models/xlnet}/__init__.py +0 -0
  246. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/relative_attention.py +0 -0
  247. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/xlnet_content_and_query_embedding.py +0 -0
  248. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/models/xlnet/xlnet_encoder.py +0 -0
  249. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/__init__.py +0 -0
  250. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/greedy_sampler.py +0 -0
  251. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/random_sampler.py +0 -0
  252. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/serialization.py +0 -0
  253. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/top_k_sampler.py +0 -0
  254. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/samplers/top_p_sampler.py +0 -0
  255. {keras-nlp-0.8.2.dev0/keras_nlp/src/models/bloom → keras-nlp-0.9.0.dev0/keras_nlp/src/tests}/__init__.py +0 -0
  256. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/__init__.py +0 -0
  257. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/sentence_piece_tokenizer_trainer.py +0 -0
  258. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/tokenizers/word_piece_tokenizer_trainer.py +0 -0
  259. {keras-nlp-0.8.2.dev0/keras_nlp/src/models/deberta_v3 → keras-nlp-0.9.0.dev0/keras_nlp/src/utils}/__init__.py +0 -0
  260. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/utils/keras_utils.py +0 -0
  261. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/src/utils/pipeline_model.py +0 -0
  262. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp/tokenizers/__init__.py +0 -0
  263. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/dependency_links.txt +0 -0
  264. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/requires.txt +0 -0
  265. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/keras_nlp.egg-info/top_level.txt +0 -0
  266. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/setup.cfg +0 -0
  267. {keras-nlp-0.8.2.dev0 → keras-nlp-0.9.0.dev0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: keras-nlp
3
- Version: 0.8.2.dev0
3
+ Version: 0.9.0.dev0
4
4
  Summary: Industry-strength Natural Language Processing extensions for Keras.
5
5
  Home-page: https://github.com/keras-team/keras-nlp
6
6
  Author: Keras team
@@ -10,5 +10,6 @@ from keras_nlp import metrics
10
10
  from keras_nlp import models
11
11
  from keras_nlp import samplers
12
12
  from keras_nlp import tokenizers
13
+ from keras_nlp.src.utils.preset_utils import upload_preset
13
14
  from keras_nlp.src.version_utils import version
14
15
  from keras_nlp.src.version_utils import __version__
@@ -11,6 +11,7 @@ from keras_nlp.src.models.albert.albert_masked_lm import AlbertMaskedLM
11
11
  from keras_nlp.src.models.albert.albert_masked_lm_preprocessor import AlbertMaskedLMPreprocessor
12
12
  from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
13
13
  from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
14
+ from keras_nlp.src.models.backbone import Backbone
14
15
  from keras_nlp.src.models.bart.bart_backbone import BartBackbone
15
16
  from keras_nlp.src.models.bart.bart_preprocessor import BartPreprocessor
16
17
  from keras_nlp.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
@@ -22,6 +23,13 @@ from keras_nlp.src.models.bert.bert_masked_lm import BertMaskedLM
22
23
  from keras_nlp.src.models.bert.bert_masked_lm_preprocessor import BertMaskedLMPreprocessor
23
24
  from keras_nlp.src.models.bert.bert_preprocessor import BertPreprocessor
24
25
  from keras_nlp.src.models.bert.bert_tokenizer import BertTokenizer
26
+ from keras_nlp.src.models.bloom.bloom_backbone import BloomBackbone
27
+ from keras_nlp.src.models.bloom.bloom_causal_lm import BloomCausalLM
28
+ from keras_nlp.src.models.bloom.bloom_causal_lm_preprocessor import BloomCausalLMPreprocessor
29
+ from keras_nlp.src.models.bloom.bloom_preprocessor import BloomPreprocessor
30
+ from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
31
+ from keras_nlp.src.models.causal_lm import CausalLM
32
+ from keras_nlp.src.models.classifier import Classifier
25
33
  from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import DebertaV3Backbone
26
34
  from keras_nlp.src.models.deberta_v3.deberta_v3_classifier import DebertaV3Classifier
27
35
  from keras_nlp.src.models.deberta_v3.deberta_v3_masked_lm import DebertaV3MaskedLM
@@ -34,12 +42,19 @@ from keras_nlp.src.models.distil_bert.distil_bert_masked_lm import DistilBertMas
34
42
  from keras_nlp.src.models.distil_bert.distil_bert_masked_lm_preprocessor import DistilBertMaskedLMPreprocessor
35
43
  from keras_nlp.src.models.distil_bert.distil_bert_preprocessor import DistilBertPreprocessor
36
44
  from keras_nlp.src.models.distil_bert.distil_bert_tokenizer import DistilBertTokenizer
45
+ from keras_nlp.src.models.electra.electra_backbone import ElectraBackbone
46
+ from keras_nlp.src.models.electra.electra_preprocessor import ElectraPreprocessor
47
+ from keras_nlp.src.models.electra.electra_tokenizer import ElectraTokenizer
37
48
  from keras_nlp.src.models.f_net.f_net_backbone import FNetBackbone
38
49
  from keras_nlp.src.models.f_net.f_net_classifier import FNetClassifier
39
50
  from keras_nlp.src.models.f_net.f_net_masked_lm import FNetMaskedLM
40
51
  from keras_nlp.src.models.f_net.f_net_masked_lm_preprocessor import FNetMaskedLMPreprocessor
41
52
  from keras_nlp.src.models.f_net.f_net_preprocessor import FNetPreprocessor
42
53
  from keras_nlp.src.models.f_net.f_net_tokenizer import FNetTokenizer
54
+ from keras_nlp.src.models.falcon.falcon_backbone import FalconBackbone
55
+ from keras_nlp.src.models.falcon.falcon_causal_lm_preprocessor import FalconCausalLMPreprocessor
56
+ from keras_nlp.src.models.falcon.falcon_preprocessor import FalconPreprocessor
57
+ from keras_nlp.src.models.falcon.falcon_tokenizer import FalconTokenizer
43
58
  from keras_nlp.src.models.gemma.gemma_backbone import GemmaBackbone
44
59
  from keras_nlp.src.models.gemma.gemma_causal_lm import GemmaCausalLM
45
60
  from keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor import GemmaCausalLMPreprocessor
@@ -50,7 +65,12 @@ from keras_nlp.src.models.gpt2.gpt2_causal_lm import GPT2CausalLM
50
65
  from keras_nlp.src.models.gpt2.gpt2_causal_lm_preprocessor import GPT2CausalLMPreprocessor
51
66
  from keras_nlp.src.models.gpt2.gpt2_preprocessor import GPT2Preprocessor
52
67
  from keras_nlp.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer
68
+ from keras_nlp.src.models.llama.llama_backbone import LlamaBackbone
69
+ from keras_nlp.src.models.llama.llama_causal_lm import LlamaCausalLM
70
+ from keras_nlp.src.models.llama.llama_causal_lm_preprocessor import LlamaCausalLMPreprocessor
71
+ from keras_nlp.src.models.llama.llama_preprocessor import LlamaPreprocessor
53
72
  from keras_nlp.src.models.llama.llama_tokenizer import LlamaTokenizer
73
+ from keras_nlp.src.models.masked_lm import MaskedLM
54
74
  from keras_nlp.src.models.mistral.mistral_backbone import MistralBackbone
55
75
  from keras_nlp.src.models.mistral.mistral_causal_lm import MistralCausalLM
56
76
  from keras_nlp.src.models.mistral.mistral_causal_lm_preprocessor import MistralCausalLMPreprocessor
@@ -61,15 +81,19 @@ from keras_nlp.src.models.opt.opt_causal_lm import OPTCausalLM
61
81
  from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import OPTCausalLMPreprocessor
62
82
  from keras_nlp.src.models.opt.opt_preprocessor import OPTPreprocessor
63
83
  from keras_nlp.src.models.opt.opt_tokenizer import OPTTokenizer
84
+ from keras_nlp.src.models.preprocessor import Preprocessor
64
85
  from keras_nlp.src.models.roberta.roberta_backbone import RobertaBackbone
65
86
  from keras_nlp.src.models.roberta.roberta_classifier import RobertaClassifier
66
87
  from keras_nlp.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
67
88
  from keras_nlp.src.models.roberta.roberta_masked_lm_preprocessor import RobertaMaskedLMPreprocessor
68
89
  from keras_nlp.src.models.roberta.roberta_preprocessor import RobertaPreprocessor
69
90
  from keras_nlp.src.models.roberta.roberta_tokenizer import RobertaTokenizer
91
+ from keras_nlp.src.models.seq_2_seq_lm import Seq2SeqLM
92
+ from keras_nlp.src.models.task import Task
70
93
  from keras_nlp.src.models.xlm_roberta.xlm_roberta_backbone import XLMRobertaBackbone
71
94
  from keras_nlp.src.models.xlm_roberta.xlm_roberta_classifier import XLMRobertaClassifier
72
95
  from keras_nlp.src.models.xlm_roberta.xlm_roberta_masked_lm import XLMRobertaMaskedLM
73
96
  from keras_nlp.src.models.xlm_roberta.xlm_roberta_masked_lm_preprocessor import XLMRobertaMaskedLMPreprocessor
74
97
  from keras_nlp.src.models.xlm_roberta.xlm_roberta_preprocessor import XLMRobertaPreprocessor
75
98
  from keras_nlp.src.models.xlm_roberta.xlm_roberta_tokenizer import XLMRobertaTokenizer
99
+ from keras_nlp.src.tokenizers.tokenizer import Tokenizer
@@ -26,6 +26,7 @@ from keras_nlp.src import models
26
26
  from keras_nlp.src import samplers
27
27
  from keras_nlp.src import tokenizers
28
28
  from keras_nlp.src import utils
29
+ from keras_nlp.src.utils.preset_utils import upload_preset
29
30
  from keras_nlp.src.version_utils import __version__
30
31
  from keras_nlp.src.version_utils import version
31
32
 
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import os
16
+
15
17
  import pytest
16
18
  import tensorflow as tf
17
19
 
@@ -83,6 +85,10 @@ def pytest_configure(config):
83
85
  "markers",
84
86
  "keras_3_only: mark test as a keras 3 only test",
85
87
  )
88
+ config.addinivalue_line(
89
+ "markers",
90
+ "kaggle_key_required: mark test needing a kaggle key",
91
+ )
86
92
 
87
93
 
88
94
  def pytest_collection_modifyitems(config, items):
@@ -107,6 +113,16 @@ def pytest_collection_modifyitems(config, items):
107
113
  not backend_config.keras_3(),
108
114
  reason="tests only run on with multi-backend keras",
109
115
  )
116
+ found_kaggle_key = all(
117
+ [
118
+ os.environ.get("KAGGLE_USERNAME", None),
119
+ os.environ.get("KAGGLE_KEY", None),
120
+ ]
121
+ )
122
+ kaggle_key_required = pytest.mark.skipif(
123
+ not found_kaggle_key,
124
+ reason="tests only run with a kaggle api key",
125
+ )
110
126
  for item in items:
111
127
  if "large" in item.keywords:
112
128
  item.add_marker(skip_large)
@@ -116,6 +132,8 @@ def pytest_collection_modifyitems(config, items):
116
132
  item.add_marker(tf_only)
117
133
  if "keras_3_only" in item.keywords:
118
134
  item.add_marker(keras_3_only)
135
+ if "kaggle_key_required" in item.keywords:
136
+ item.add_marker(kaggle_key_required)
119
137
 
120
138
 
121
139
  # Disable traceback filtering for quicker debugging of tests failures.
@@ -35,12 +35,15 @@ class AlibiBias(keras.layers.Layer):
35
35
  each head. The heads' slopes are a geometric sequence that starts at
36
36
  `2**(-alibi_bias_max/num_heads)` and uses that same value as its
37
37
  ratio. Defaults to 8.
38
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
39
+ including `name`, `trainable`, `dtype` etc.
40
+
38
41
  Call arguments:
39
42
  attention_scores: The result of multipying the query and the key of the
40
43
  multi-head attention layer of the transformer to add alibi bias to
41
44
  it. With shape `(batch_size, num_heads, query_length, key_length)`.
42
45
 
43
- Examples:
46
+ Example:
44
47
  ```python
45
48
  query_length = 10
46
49
  key_length = 10
@@ -94,7 +97,9 @@ class AlibiBias(keras.layers.Layer):
94
97
  )
95
98
  slopes = ops.expand_dims(slopes, 1)
96
99
 
97
- seq_range = ops.expand_dims(ops.arange(1 - key_length, 1), 0)
100
+ seq_range = ops.expand_dims(
101
+ ops.arange(1 - key_length, 1, dtype="int32"), 0
102
+ )
98
103
  seq_range = ops.cast(seq_range, dtype=self.compute_dtype)
99
104
 
100
105
  alibi_bias = ops.multiply(slopes, seq_range)
@@ -47,10 +47,10 @@ class FNetEncoder(keras.layers.Layer):
47
47
  bias_initializer: "string" or `keras.initializers` initializer.
48
48
  The bias initializer for the dense layers.
49
49
  Defaults to `"zeros"`.
50
- name: string. The name of the layer. Defaults to `None`.
51
- **kwargs: other keyword arguments.
50
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
51
+ including `name`, `trainable`, `dtype` etc.
52
52
 
53
- Examples:
53
+ Example:
54
54
 
55
55
  ```python
56
56
  # Create a single FNet encoder layer.
@@ -79,10 +79,9 @@ class FNetEncoder(keras.layers.Layer):
79
79
  layer_norm_epsilon=1e-5,
80
80
  kernel_initializer="glorot_uniform",
81
81
  bias_initializer="zeros",
82
- name=None,
83
82
  **kwargs
84
83
  ):
85
- super().__init__(name=name, **kwargs)
84
+ super().__init__(**kwargs)
86
85
  self.intermediate_dim = intermediate_dim
87
86
  self.dropout = dropout
88
87
  self.activation = keras.activations.get(activation)
@@ -59,8 +59,10 @@ class MaskedLMHead(keras.layers.Layer):
59
59
  bias_initializer: string or `keras.initializers` initializer.
60
60
  The bias initializer for the dense and multiheaded
61
61
  attention layers. Defaults to `"zeros"`.
62
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
63
+ including `name`, `trainable`, `dtype` etc.
62
64
 
63
- Examples:
65
+ Example:
64
66
 
65
67
  ```python
66
68
  batch_size = 16
@@ -33,6 +33,8 @@ class PositionEmbedding(keras.layers.Layer):
33
33
  initializer: The initializer to use for the embedding weights. Defaults
34
34
  to `"glorot_uniform"`.
35
35
  seq_axis: The axis of the input tensor where we add the embeddings.
36
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
37
+ including `name`, `trainable`, `dtype` etc.
36
38
 
37
39
  Call arguments:
38
40
  inputs: The tensor inputs to compute an embedding for, with shape
@@ -43,7 +45,7 @@ class PositionEmbedding(keras.layers.Layer):
43
45
  compute the position embedding from. This is useful during cached
44
46
  decoding, where each position is predicted separately in a loop.
45
47
 
46
- Examples:
48
+ Example:
47
49
 
48
50
  Called directly on input.
49
51
  >>> layer = keras_nlp.layers.PositionEmbedding(sequence_length=10)
@@ -52,6 +52,8 @@ class ReversibleEmbedding(keras.layers.Embedding):
52
52
  reverse_dtype: The dtype for the reverse projection computation.
53
53
  For stability, it is usually best to use full precision even when
54
54
  working with half or mixed precision training.
55
+ **kwargs: other keyword arguments passed to `keras.layers.Embedding`,
56
+ including `name`, `trainable`, `dtype` etc.
55
57
 
56
58
  Call arguments:
57
59
  inputs: The tensor inputs to the layer.
@@ -59,7 +61,7 @@ class ReversibleEmbedding(keras.layers.Embedding):
59
61
  from `output_dim` to `input_dim`, instead of a normal embedding
60
62
  call. Default to `False`.
61
63
 
62
- Examples:
64
+ Example:
63
65
  ```python
64
66
  batch_size = 16
65
67
  vocab_size = 100
@@ -73,7 +75,7 @@ class ReversibleEmbedding(keras.layers.Embedding):
73
75
  # Embed tokens to shape `(batch_size, seq_length, hidden_dim)`.
74
76
  hidden_states = embedding(token_ids)
75
77
  # Project hidden states to shape `(batch_size, seq_length, vocab_size)`.
76
- logits = embedding(hidden_state, reverse=True)
78
+ logits = embedding(hidden_states, reverse=True)
77
79
  ```
78
80
 
79
81
  References:
@@ -38,6 +38,8 @@ class RotaryEmbedding(keras.layers.Layer):
38
38
  scaling_factor: float. The scaling factor used to scale frequency range.
39
39
  sequence_axis: int. Sequence axis in the input tensor.
40
40
  feature_axis: int. Feature axis in the input tensor.
41
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
42
+ including `name`, `trainable`, `dtype` etc.
41
43
 
42
44
  Call arguments:
43
45
  inputs: The tensor inputs to apply the embedding to. This can have
@@ -85,30 +87,42 @@ class RotaryEmbedding(keras.layers.Layer):
85
87
  self.built = True
86
88
 
87
89
  def call(self, inputs, start_index=0):
90
+ inputs = ops.moveaxis(
91
+ inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
92
+ )
88
93
  cos_emb, sin_emb = self._compute_cos_sin_embedding(inputs, start_index)
89
- return self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
94
+ output = self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
95
+ return ops.moveaxis(
96
+ output, (-1, 1), (self.feature_axis, self.sequence_axis)
97
+ )
90
98
 
91
99
  def _apply_rotary_pos_emb(self, tensor, cos_emb, sin_emb):
92
- x1, x2 = ops.split(tensor, 2, axis=self.feature_axis)
93
- half_rot_tensor = ops.concatenate((-x2, x1), axis=self.feature_axis)
100
+ x1, x2 = ops.split(tensor, 2, axis=-1)
101
+ # Avoid `ops.concatenate` for now, to avoid a obscure bug with XLA
102
+ # compilation on jax. We should be able to remove this once the
103
+ # following PR is in all jax releases we care about:
104
+ # https://github.com/openxla/xla/pull/7875
105
+ half_rot_tensor = ops.stack((-x2, x1), axis=-2)
106
+ half_rot_tensor = ops.reshape(half_rot_tensor, ops.shape(tensor))
94
107
  return (tensor * cos_emb) + (half_rot_tensor * sin_emb)
95
108
 
96
109
  def _compute_cos_sin_embedding(self, inputs, start_index=0):
97
- def get_axis(axis):
98
- return axis if axis > 0 else len(inputs.shape) + axis
110
+ start_index = ops.cast(start_index, dtype="float32")
99
111
 
100
- feature_axis = get_axis(self.feature_axis)
101
- sequence_axis = get_axis(self.sequence_axis)
112
+ feature_axis = len(inputs.shape) - 1
113
+ sequence_axis = 1
102
114
 
103
115
  rotary_dim = ops.shape(inputs)[feature_axis]
104
116
  inverse_freq = self._get_inverse_freq(rotary_dim)
105
117
 
106
- seq_len = ops.shape(inputs)[self.sequence_axis]
107
- tensor = ops.cast(ops.arange(seq_len), self.compute_dtype) + start_index
118
+ seq_len = ops.shape(inputs)[sequence_axis]
119
+ tensor = ops.arange(seq_len, dtype="float32") + start_index
108
120
 
109
- tensor = ops.cast(tensor, dtype=inverse_freq.dtype)
110
121
  freq = ops.einsum("i,j->ij", tensor, inverse_freq)
111
- embedding = ops.concatenate((freq, freq), axis=-1)
122
+ embedding = ops.stack((freq, freq), axis=-2)
123
+ embedding = ops.reshape(
124
+ embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
125
+ )
112
126
 
113
127
  # Reshape the embedding to be broadcastable with input shape.
114
128
  if feature_axis < sequence_axis:
@@ -117,17 +131,16 @@ class RotaryEmbedding(keras.layers.Layer):
117
131
  if axis != sequence_axis and axis != feature_axis:
118
132
  embedding = ops.expand_dims(embedding, axis)
119
133
 
120
- return ops.cos(embedding), ops.sin(embedding)
134
+ cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
135
+ sin_emb = ops.cast(ops.sin(embedding), self.compute_dtype)
136
+ return cos_emb, sin_emb
121
137
 
122
138
  def _get_inverse_freq(self, rotary_dim):
123
- freq_range = ops.arange(0, rotary_dim, 2)
124
- freq_range = ops.cast(freq_range, self.compute_dtype)
125
- freq_range = freq_range / ops.cast(
126
- self.scaling_factor, self.compute_dtype
127
- )
139
+ freq_range = ops.arange(0, rotary_dim, 2, dtype="float32")
140
+ freq_range = freq_range / ops.cast(self.scaling_factor, "float32")
128
141
  inverse_freq = 1.0 / (
129
142
  self.max_wavelength
130
- ** (freq_range / ops.cast(rotary_dim, self.compute_dtype))
143
+ ** (freq_range / ops.cast(rotary_dim, "float32"))
131
144
  )
132
145
  return inverse_freq
133
146
 
@@ -34,6 +34,8 @@ class SinePositionEncoding(keras.layers.Layer):
34
34
  max_wavelength: The maximum angular wavelength of the sine/cosine
35
35
  curves, as described in Attention is All You Need. Defaults to
36
36
  `10000`.
37
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
38
+ including `name`, `trainable`, `dtype` etc.
37
39
 
38
40
  Call arguments:
39
41
  inputs: The tensor inputs to compute an embedding for, with shape
@@ -42,7 +44,7 @@ class SinePositionEncoding(keras.layers.Layer):
42
44
  compute the encoding from. This is useful during cached decoding,
43
45
  where each position is predicted separately in a loop.
44
46
 
45
- Examples:
47
+ Example:
46
48
  ```python
47
49
  # create a simple embedding layer with sinusoidal positional encoding
48
50
  seq_len = 100
@@ -33,6 +33,9 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
33
33
  vocabulary_size: The size of the vocabulary.
34
34
  sequence_length: The maximum length of input sequence
35
35
  embedding_dim: The output dimension of the embedding layer
36
+ tie_weights: Boolean, whether or not the matrix for embedding and
37
+ the matrix for the `reverse` projection should share the same
38
+ weights.
36
39
  embeddings_initializer: The initializer to use for the Embedding
37
40
  Layers
38
41
  mask_zero: Boolean, whether or not the input value 0 is a special
@@ -43,8 +46,10 @@ class TokenAndPositionEmbedding(keras.layers.Layer):
43
46
  If mask_zero` is set to True, as a consequence, index 0 cannot be
44
47
  used in the vocabulary
45
48
  (input_dim should equal size of vocabulary + 1).
49
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
50
+ including `name`, `trainable`, `dtype` etc.
46
51
 
47
- Examples:
52
+ Example:
48
53
  ```python
49
54
  inputs = np.ones(shape=(1, 50), dtype="int32")
50
55
  embedding_layer = keras_nlp.layers.TokenAndPositionEmbedding(
@@ -34,12 +34,9 @@ class TransformerDecoder(keras.layers.Layer):
34
34
  paper [Attention is All You Need](https://arxiv.org/abs/1706.03762). Users
35
35
  can instantiate multiple instances of this class to stack up a decoder.
36
36
 
37
- By default, this layer will apply a causal mask to the decoder attention layer.
38
- This layer will correctly compute an attention mask from an implicit
39
- Keras padding mask (for example, by passing `mask_zero=True` to a
40
- `keras.layers.Embedding` layer). See the Masking and Padding
41
- [guide](https://keras.io/guides/understanding_masking_and_padding/)
42
- for more details.
37
+ By default, this layer will apply a causal mask to the decoder attention
38
+ layer. You can also pass padding or attention masks directly to the layer
39
+ during call, e.g. with `decoder_padding_mask` or `decoder_attention_mask`.
43
40
 
44
41
  This layer can be called with either one or two inputs. The number of inputs
45
42
  must be consistent across all calls. The options are as follows:
@@ -72,10 +69,10 @@ class TransformerDecoder(keras.layers.Layer):
72
69
  (similar to GPT-2). If set to False, outputs of attention layer and
73
70
  intermediate dense layer are normalized (similar to BERT).
74
71
  Defaults to `False`.
75
- name: string. The name of the layer. Defaults to `None`.
76
- **kwargs: other keyword arguments.
72
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
73
+ including `name`, `trainable`, `dtype` etc.
77
74
 
78
- Examples:
75
+ Example:
79
76
  ```python
80
77
  # Create a single transformer decoder layer.
81
78
  decoder = keras_nlp.layers.TransformerDecoder(
@@ -58,10 +58,10 @@ class TransformerEncoder(keras.layers.Layer):
58
58
  (similar to GPT-2). If set to False, outputs of attention layer and
59
59
  intermediate dense layer are normalized (similar to BERT).
60
60
  Defaults to `False`.
61
- name: string. The name of the layer. Defaults to `None`.
62
- **kwargs: other keyword arguments.
61
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
62
+ including `name`, `trainable`, `dtype` etc.
63
63
 
64
- Examples:
64
+ Example:
65
65
 
66
66
  ```python
67
67
  # Create a single transformer encoder layer.
@@ -20,6 +20,7 @@ from keras_nlp.src.models.albert.albert_masked_lm_preprocessor import (
20
20
  )
21
21
  from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
22
22
  from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
23
+ from keras_nlp.src.models.backbone import Backbone
23
24
  from keras_nlp.src.models.bart.bart_backbone import BartBackbone
24
25
  from keras_nlp.src.models.bart.bart_preprocessor import BartPreprocessor
25
26
  from keras_nlp.src.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
@@ -36,7 +37,14 @@ from keras_nlp.src.models.bert.bert_masked_lm_preprocessor import (
36
37
  from keras_nlp.src.models.bert.bert_preprocessor import BertPreprocessor
37
38
  from keras_nlp.src.models.bert.bert_tokenizer import BertTokenizer
38
39
  from keras_nlp.src.models.bloom.bloom_backbone import BloomBackbone
40
+ from keras_nlp.src.models.bloom.bloom_causal_lm import BloomCausalLM
41
+ from keras_nlp.src.models.bloom.bloom_causal_lm_preprocessor import (
42
+ BloomCausalLMPreprocessor,
43
+ )
44
+ from keras_nlp.src.models.bloom.bloom_preprocessor import BloomPreprocessor
39
45
  from keras_nlp.src.models.bloom.bloom_tokenizer import BloomTokenizer
46
+ from keras_nlp.src.models.causal_lm import CausalLM
47
+ from keras_nlp.src.models.classifier import Classifier
40
48
  from keras_nlp.src.models.deberta_v3.deberta_v3_backbone import DebertaV3Backbone
41
49
  from keras_nlp.src.models.deberta_v3.deberta_v3_classifier import (
42
50
  DebertaV3Classifier,
@@ -66,6 +74,7 @@ from keras_nlp.src.models.distil_bert.distil_bert_tokenizer import (
66
74
  DistilBertTokenizer,
67
75
  )
68
76
  from keras_nlp.src.models.electra.electra_backbone import ElectraBackbone
77
+ from keras_nlp.src.models.electra.electra_preprocessor import ElectraPreprocessor
69
78
  from keras_nlp.src.models.electra.electra_tokenizer import ElectraTokenizer
70
79
  from keras_nlp.src.models.f_net.f_net_backbone import FNetBackbone
71
80
  from keras_nlp.src.models.f_net.f_net_classifier import FNetClassifier
@@ -75,6 +84,8 @@ from keras_nlp.src.models.f_net.f_net_masked_lm_preprocessor import (
75
84
  )
76
85
  from keras_nlp.src.models.f_net.f_net_preprocessor import FNetPreprocessor
77
86
  from keras_nlp.src.models.f_net.f_net_tokenizer import FNetTokenizer
87
+ from keras_nlp.src.models.falcon.falcon_backbone import FalconBackbone
88
+ from keras_nlp.src.models.falcon.falcon_tokenizer import FalconTokenizer
78
89
  from keras_nlp.src.models.gemma.gemma_backbone import GemmaBackbone
79
90
  from keras_nlp.src.models.gemma.gemma_causal_lm import GemmaCausalLM
80
91
  from keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor import (
@@ -99,6 +110,13 @@ from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_preprocessor import (
99
110
  )
100
111
  from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
101
112
  from keras_nlp.src.models.llama.llama_backbone import LlamaBackbone
113
+ from keras_nlp.src.models.llama.llama_causal_lm import LlamaCausalLM
114
+ from keras_nlp.src.models.llama.llama_causal_lm_preprocessor import (
115
+ LlamaCausalLMPreprocessor,
116
+ )
117
+ from keras_nlp.src.models.llama.llama_preprocessor import LlamaPreprocessor
118
+ from keras_nlp.src.models.llama.llama_tokenizer import LlamaTokenizer
119
+ from keras_nlp.src.models.masked_lm import MaskedLM
102
120
  from keras_nlp.src.models.mistral.mistral_backbone import MistralBackbone
103
121
  from keras_nlp.src.models.mistral.mistral_causal_lm import MistralCausalLM
104
122
  from keras_nlp.src.models.mistral.mistral_causal_lm_preprocessor import (
@@ -113,6 +131,7 @@ from keras_nlp.src.models.opt.opt_causal_lm_preprocessor import (
113
131
  )
114
132
  from keras_nlp.src.models.opt.opt_preprocessor import OPTPreprocessor
115
133
  from keras_nlp.src.models.opt.opt_tokenizer import OPTTokenizer
134
+ from keras_nlp.src.models.preprocessor import Preprocessor
116
135
  from keras_nlp.src.models.roberta.roberta_backbone import RobertaBackbone
117
136
  from keras_nlp.src.models.roberta.roberta_classifier import RobertaClassifier
118
137
  from keras_nlp.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
@@ -121,8 +140,10 @@ from keras_nlp.src.models.roberta.roberta_masked_lm_preprocessor import (
121
140
  )
122
141
  from keras_nlp.src.models.roberta.roberta_preprocessor import RobertaPreprocessor
123
142
  from keras_nlp.src.models.roberta.roberta_tokenizer import RobertaTokenizer
143
+ from keras_nlp.src.models.seq_2_seq_lm import Seq2SeqLM
124
144
  from keras_nlp.src.models.t5.t5_backbone import T5Backbone
125
145
  from keras_nlp.src.models.t5.t5_tokenizer import T5Tokenizer
146
+ from keras_nlp.src.models.task import Task
126
147
  from keras_nlp.src.models.whisper.whisper_audio_feature_extractor import (
127
148
  WhisperAudioFeatureExtractor,
128
149
  )
@@ -146,4 +167,5 @@ from keras_nlp.src.models.xlm_roberta.xlm_roberta_tokenizer import (
146
167
  XLMRobertaTokenizer,
147
168
  )
148
169
  from keras_nlp.src.models.xlnet.xlnet_backbone import XLNetBackbone
170
+ from keras_nlp.src.tokenizers.tokenizer import Tokenizer
149
171
 
@@ -0,0 +1,21 @@
1
+ # Copyright 2023 The KerasNLP Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from keras_nlp.src.models.albert.albert_backbone import AlbertBackbone
16
+ from keras_nlp.src.models.albert.albert_presets import backbone_presets
17
+ from keras_nlp.src.models.albert.albert_tokenizer import AlbertTokenizer
18
+ from keras_nlp.src.utils.preset_utils import register_presets
19
+
20
+ register_presets(backbone_presets, (AlbertBackbone, AlbertTokenizer))
21
+
@@ -12,17 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import copy
16
-
17
15
  from keras_nlp.src.api_export import keras_nlp_export
18
16
  from keras_nlp.src.backend import keras
19
17
  from keras_nlp.src.layers.modeling.position_embedding import PositionEmbedding
20
18
  from keras_nlp.src.layers.modeling.reversible_embedding import ReversibleEmbedding
21
19
  from keras_nlp.src.layers.modeling.transformer_encoder import TransformerEncoder
22
- from keras_nlp.src.models.albert.albert_presets import backbone_presets
23
20
  from keras_nlp.src.models.backbone import Backbone
24
21
  from keras_nlp.src.utils.keras_utils import gelu_approximate
25
- from keras_nlp.src.utils.python_utils import classproperty
26
22
 
27
23
 
28
24
  def albert_kernel_initializer(stddev=0.02):
@@ -77,7 +73,7 @@ class AlbertBackbone(Backbone):
77
73
  such as softmax and layer normalization, will always be done at
78
74
  float32 precision regardless of dtype.
79
75
 
80
- Examples:
76
+ Example:
81
77
  ```python
82
78
  input_data = {
83
79
  "token_ids": np.ones(shape=(1, 12), dtype="int32"),
@@ -230,6 +226,7 @@ class AlbertBackbone(Backbone):
230
226
  "sequence_output": sequence_output,
231
227
  "pooled_output": pooled_output,
232
228
  },
229
+ dtype=dtype,
233
230
  **kwargs,
234
231
  )
235
232
 
@@ -266,7 +263,3 @@ class AlbertBackbone(Backbone):
266
263
  )
267
264
  return config
268
265
 
269
- @classproperty
270
- def presets(cls):
271
- return copy.deepcopy(backbone_presets)
272
-
@@ -12,20 +12,16 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import copy
16
-
17
15
  from keras_nlp.src.api_export import keras_nlp_export
18
16
  from keras_nlp.src.backend import keras
19
17
  from keras_nlp.src.models.albert.albert_backbone import AlbertBackbone
20
18
  from keras_nlp.src.models.albert.albert_backbone import albert_kernel_initializer
21
19
  from keras_nlp.src.models.albert.albert_preprocessor import AlbertPreprocessor
22
- from keras_nlp.src.models.albert.albert_presets import backbone_presets
23
- from keras_nlp.src.models.task import Task
24
- from keras_nlp.src.utils.python_utils import classproperty
20
+ from keras_nlp.src.models.classifier import Classifier
25
21
 
26
22
 
27
23
  @keras_nlp_export("keras_nlp.models.AlbertClassifier")
28
- class AlbertClassifier(Task):
24
+ class AlbertClassifier(Classifier):
29
25
  """An end-to-end ALBERT model for classification tasks
30
26
 
31
27
  This model attaches a classification head to a `keras_nlp.model.AlbertBackbone`
@@ -146,6 +142,9 @@ class AlbertClassifier(Task):
146
142
  ```
147
143
  """
148
144
 
145
+ backbone_cls = AlbertBackbone
146
+ preprocessor_cls = AlbertPreprocessor
147
+
149
148
  def __init__(
150
149
  self,
151
150
  backbone,
@@ -187,17 +186,6 @@ class AlbertClassifier(Task):
187
186
  self.activation = keras.activations.get(activation)
188
187
  self.dropout = dropout
189
188
 
190
- # === Default compilation ===
191
- logit_output = self.activation == keras.activations.linear
192
- self.compile(
193
- loss=keras.losses.SparseCategoricalCrossentropy(
194
- from_logits=logit_output
195
- ),
196
- optimizer=keras.optimizers.Adam(5e-5),
197
- metrics=[keras.metrics.SparseCategoricalAccuracy()],
198
- jit_compile=True,
199
- )
200
-
201
189
  def get_config(self):
202
190
  config = super().get_config()
203
191
  config.update(
@@ -210,15 +198,3 @@ class AlbertClassifier(Task):
210
198
 
211
199
  return config
212
200
 
213
- @classproperty
214
- def backbone_cls(cls):
215
- return AlbertBackbone
216
-
217
- @classproperty
218
- def preprocessor_cls(cls):
219
- return AlbertPreprocessor
220
-
221
- @classproperty
222
- def presets(cls):
223
- return copy.deepcopy({**backbone_presets})
224
-