keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. keras_hub/__init__.py +52 -0
  2. keras_hub/api/__init__.py +27 -0
  3. keras_hub/api/layers/__init__.py +47 -0
  4. keras_hub/api/metrics/__init__.py +24 -0
  5. keras_hub/api/models/__init__.py +249 -0
  6. keras_hub/api/samplers/__init__.py +29 -0
  7. keras_hub/api/tokenizers/__init__.py +35 -0
  8. keras_hub/src/__init__.py +13 -0
  9. keras_hub/src/api_export.py +53 -0
  10. keras_hub/src/layers/__init__.py +13 -0
  11. keras_hub/src/layers/modeling/__init__.py +13 -0
  12. keras_hub/src/layers/modeling/alibi_bias.py +143 -0
  13. keras_hub/src/layers/modeling/cached_multi_head_attention.py +137 -0
  14. keras_hub/src/layers/modeling/f_net_encoder.py +200 -0
  15. keras_hub/src/layers/modeling/masked_lm_head.py +239 -0
  16. keras_hub/src/layers/modeling/position_embedding.py +123 -0
  17. keras_hub/src/layers/modeling/reversible_embedding.py +311 -0
  18. keras_hub/src/layers/modeling/rotary_embedding.py +169 -0
  19. keras_hub/src/layers/modeling/sine_position_encoding.py +108 -0
  20. keras_hub/src/layers/modeling/token_and_position_embedding.py +150 -0
  21. keras_hub/src/layers/modeling/transformer_decoder.py +496 -0
  22. keras_hub/src/layers/modeling/transformer_encoder.py +262 -0
  23. keras_hub/src/layers/modeling/transformer_layer_utils.py +106 -0
  24. keras_hub/src/layers/preprocessing/__init__.py +13 -0
  25. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +220 -0
  26. keras_hub/src/layers/preprocessing/multi_segment_packer.py +319 -0
  27. keras_hub/src/layers/preprocessing/preprocessing_layer.py +62 -0
  28. keras_hub/src/layers/preprocessing/random_deletion.py +271 -0
  29. keras_hub/src/layers/preprocessing/random_swap.py +267 -0
  30. keras_hub/src/layers/preprocessing/start_end_packer.py +219 -0
  31. keras_hub/src/metrics/__init__.py +13 -0
  32. keras_hub/src/metrics/bleu.py +394 -0
  33. keras_hub/src/metrics/edit_distance.py +197 -0
  34. keras_hub/src/metrics/perplexity.py +181 -0
  35. keras_hub/src/metrics/rouge_base.py +204 -0
  36. keras_hub/src/metrics/rouge_l.py +97 -0
  37. keras_hub/src/metrics/rouge_n.py +125 -0
  38. keras_hub/src/models/__init__.py +13 -0
  39. keras_hub/src/models/albert/__init__.py +20 -0
  40. keras_hub/src/models/albert/albert_backbone.py +267 -0
  41. keras_hub/src/models/albert/albert_classifier.py +202 -0
  42. keras_hub/src/models/albert/albert_masked_lm.py +129 -0
  43. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +194 -0
  44. keras_hub/src/models/albert/albert_preprocessor.py +206 -0
  45. keras_hub/src/models/albert/albert_presets.py +70 -0
  46. keras_hub/src/models/albert/albert_tokenizer.py +119 -0
  47. keras_hub/src/models/backbone.py +311 -0
  48. keras_hub/src/models/bart/__init__.py +20 -0
  49. keras_hub/src/models/bart/bart_backbone.py +261 -0
  50. keras_hub/src/models/bart/bart_preprocessor.py +276 -0
  51. keras_hub/src/models/bart/bart_presets.py +74 -0
  52. keras_hub/src/models/bart/bart_seq_2_seq_lm.py +490 -0
  53. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +262 -0
  54. keras_hub/src/models/bart/bart_tokenizer.py +124 -0
  55. keras_hub/src/models/bert/__init__.py +23 -0
  56. keras_hub/src/models/bert/bert_backbone.py +227 -0
  57. keras_hub/src/models/bert/bert_classifier.py +183 -0
  58. keras_hub/src/models/bert/bert_masked_lm.py +131 -0
  59. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +198 -0
  60. keras_hub/src/models/bert/bert_preprocessor.py +184 -0
  61. keras_hub/src/models/bert/bert_presets.py +147 -0
  62. keras_hub/src/models/bert/bert_tokenizer.py +112 -0
  63. keras_hub/src/models/bloom/__init__.py +20 -0
  64. keras_hub/src/models/bloom/bloom_attention.py +186 -0
  65. keras_hub/src/models/bloom/bloom_backbone.py +173 -0
  66. keras_hub/src/models/bloom/bloom_causal_lm.py +298 -0
  67. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +176 -0
  68. keras_hub/src/models/bloom/bloom_decoder.py +206 -0
  69. keras_hub/src/models/bloom/bloom_preprocessor.py +185 -0
  70. keras_hub/src/models/bloom/bloom_presets.py +121 -0
  71. keras_hub/src/models/bloom/bloom_tokenizer.py +116 -0
  72. keras_hub/src/models/causal_lm.py +383 -0
  73. keras_hub/src/models/classifier.py +109 -0
  74. keras_hub/src/models/csp_darknet/__init__.py +13 -0
  75. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +410 -0
  76. keras_hub/src/models/csp_darknet/csp_darknet_image_classifier.py +133 -0
  77. keras_hub/src/models/deberta_v3/__init__.py +24 -0
  78. keras_hub/src/models/deberta_v3/deberta_v3_backbone.py +210 -0
  79. keras_hub/src/models/deberta_v3/deberta_v3_classifier.py +228 -0
  80. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm.py +135 -0
  81. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +191 -0
  82. keras_hub/src/models/deberta_v3/deberta_v3_preprocessor.py +206 -0
  83. keras_hub/src/models/deberta_v3/deberta_v3_presets.py +82 -0
  84. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +155 -0
  85. keras_hub/src/models/deberta_v3/disentangled_attention_encoder.py +227 -0
  86. keras_hub/src/models/deberta_v3/disentangled_self_attention.py +412 -0
  87. keras_hub/src/models/deberta_v3/relative_embedding.py +94 -0
  88. keras_hub/src/models/densenet/__init__.py +13 -0
  89. keras_hub/src/models/densenet/densenet_backbone.py +210 -0
  90. keras_hub/src/models/densenet/densenet_image_classifier.py +131 -0
  91. keras_hub/src/models/distil_bert/__init__.py +26 -0
  92. keras_hub/src/models/distil_bert/distil_bert_backbone.py +187 -0
  93. keras_hub/src/models/distil_bert/distil_bert_classifier.py +208 -0
  94. keras_hub/src/models/distil_bert/distil_bert_masked_lm.py +137 -0
  95. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +194 -0
  96. keras_hub/src/models/distil_bert/distil_bert_preprocessor.py +175 -0
  97. keras_hub/src/models/distil_bert/distil_bert_presets.py +57 -0
  98. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +114 -0
  99. keras_hub/src/models/electra/__init__.py +20 -0
  100. keras_hub/src/models/electra/electra_backbone.py +247 -0
  101. keras_hub/src/models/electra/electra_preprocessor.py +154 -0
  102. keras_hub/src/models/electra/electra_presets.py +95 -0
  103. keras_hub/src/models/electra/electra_tokenizer.py +104 -0
  104. keras_hub/src/models/f_net/__init__.py +20 -0
  105. keras_hub/src/models/f_net/f_net_backbone.py +236 -0
  106. keras_hub/src/models/f_net/f_net_classifier.py +154 -0
  107. keras_hub/src/models/f_net/f_net_masked_lm.py +132 -0
  108. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +196 -0
  109. keras_hub/src/models/f_net/f_net_preprocessor.py +177 -0
  110. keras_hub/src/models/f_net/f_net_presets.py +43 -0
  111. keras_hub/src/models/f_net/f_net_tokenizer.py +95 -0
  112. keras_hub/src/models/falcon/__init__.py +20 -0
  113. keras_hub/src/models/falcon/falcon_attention.py +156 -0
  114. keras_hub/src/models/falcon/falcon_backbone.py +164 -0
  115. keras_hub/src/models/falcon/falcon_causal_lm.py +291 -0
  116. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +173 -0
  117. keras_hub/src/models/falcon/falcon_preprocessor.py +187 -0
  118. keras_hub/src/models/falcon/falcon_presets.py +30 -0
  119. keras_hub/src/models/falcon/falcon_tokenizer.py +110 -0
  120. keras_hub/src/models/falcon/falcon_transformer_decoder.py +255 -0
  121. keras_hub/src/models/feature_pyramid_backbone.py +73 -0
  122. keras_hub/src/models/gemma/__init__.py +20 -0
  123. keras_hub/src/models/gemma/gemma_attention.py +250 -0
  124. keras_hub/src/models/gemma/gemma_backbone.py +316 -0
  125. keras_hub/src/models/gemma/gemma_causal_lm.py +448 -0
  126. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +167 -0
  127. keras_hub/src/models/gemma/gemma_decoder_block.py +241 -0
  128. keras_hub/src/models/gemma/gemma_preprocessor.py +191 -0
  129. keras_hub/src/models/gemma/gemma_presets.py +248 -0
  130. keras_hub/src/models/gemma/gemma_tokenizer.py +103 -0
  131. keras_hub/src/models/gemma/rms_normalization.py +40 -0
  132. keras_hub/src/models/gpt2/__init__.py +20 -0
  133. keras_hub/src/models/gpt2/gpt2_backbone.py +199 -0
  134. keras_hub/src/models/gpt2/gpt2_causal_lm.py +437 -0
  135. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +173 -0
  136. keras_hub/src/models/gpt2/gpt2_preprocessor.py +187 -0
  137. keras_hub/src/models/gpt2/gpt2_presets.py +82 -0
  138. keras_hub/src/models/gpt2/gpt2_tokenizer.py +110 -0
  139. keras_hub/src/models/gpt_neo_x/__init__.py +13 -0
  140. keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py +251 -0
  141. keras_hub/src/models/gpt_neo_x/gpt_neo_x_backbone.py +175 -0
  142. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm.py +201 -0
  143. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +141 -0
  144. keras_hub/src/models/gpt_neo_x/gpt_neo_x_decoder.py +258 -0
  145. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +145 -0
  146. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +88 -0
  147. keras_hub/src/models/image_classifier.py +90 -0
  148. keras_hub/src/models/llama/__init__.py +20 -0
  149. keras_hub/src/models/llama/llama_attention.py +225 -0
  150. keras_hub/src/models/llama/llama_backbone.py +188 -0
  151. keras_hub/src/models/llama/llama_causal_lm.py +327 -0
  152. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +170 -0
  153. keras_hub/src/models/llama/llama_decoder.py +246 -0
  154. keras_hub/src/models/llama/llama_layernorm.py +48 -0
  155. keras_hub/src/models/llama/llama_preprocessor.py +189 -0
  156. keras_hub/src/models/llama/llama_presets.py +80 -0
  157. keras_hub/src/models/llama/llama_tokenizer.py +84 -0
  158. keras_hub/src/models/llama3/__init__.py +20 -0
  159. keras_hub/src/models/llama3/llama3_backbone.py +84 -0
  160. keras_hub/src/models/llama3/llama3_causal_lm.py +46 -0
  161. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +173 -0
  162. keras_hub/src/models/llama3/llama3_preprocessor.py +21 -0
  163. keras_hub/src/models/llama3/llama3_presets.py +69 -0
  164. keras_hub/src/models/llama3/llama3_tokenizer.py +63 -0
  165. keras_hub/src/models/masked_lm.py +101 -0
  166. keras_hub/src/models/mistral/__init__.py +20 -0
  167. keras_hub/src/models/mistral/mistral_attention.py +238 -0
  168. keras_hub/src/models/mistral/mistral_backbone.py +203 -0
  169. keras_hub/src/models/mistral/mistral_causal_lm.py +328 -0
  170. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +175 -0
  171. keras_hub/src/models/mistral/mistral_layer_norm.py +48 -0
  172. keras_hub/src/models/mistral/mistral_preprocessor.py +190 -0
  173. keras_hub/src/models/mistral/mistral_presets.py +48 -0
  174. keras_hub/src/models/mistral/mistral_tokenizer.py +82 -0
  175. keras_hub/src/models/mistral/mistral_transformer_decoder.py +265 -0
  176. keras_hub/src/models/mix_transformer/__init__.py +13 -0
  177. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +181 -0
  178. keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +133 -0
  179. keras_hub/src/models/mix_transformer/mix_transformer_layers.py +300 -0
  180. keras_hub/src/models/opt/__init__.py +20 -0
  181. keras_hub/src/models/opt/opt_backbone.py +173 -0
  182. keras_hub/src/models/opt/opt_causal_lm.py +301 -0
  183. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +177 -0
  184. keras_hub/src/models/opt/opt_preprocessor.py +188 -0
  185. keras_hub/src/models/opt/opt_presets.py +72 -0
  186. keras_hub/src/models/opt/opt_tokenizer.py +116 -0
  187. keras_hub/src/models/pali_gemma/__init__.py +23 -0
  188. keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +277 -0
  189. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py +313 -0
  190. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +147 -0
  191. keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py +160 -0
  192. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +78 -0
  193. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +79 -0
  194. keras_hub/src/models/pali_gemma/pali_gemma_vit.py +566 -0
  195. keras_hub/src/models/phi3/__init__.py +20 -0
  196. keras_hub/src/models/phi3/phi3_attention.py +260 -0
  197. keras_hub/src/models/phi3/phi3_backbone.py +224 -0
  198. keras_hub/src/models/phi3/phi3_causal_lm.py +218 -0
  199. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +173 -0
  200. keras_hub/src/models/phi3/phi3_decoder.py +260 -0
  201. keras_hub/src/models/phi3/phi3_layernorm.py +48 -0
  202. keras_hub/src/models/phi3/phi3_preprocessor.py +190 -0
  203. keras_hub/src/models/phi3/phi3_presets.py +50 -0
  204. keras_hub/src/models/phi3/phi3_rotary_embedding.py +137 -0
  205. keras_hub/src/models/phi3/phi3_tokenizer.py +94 -0
  206. keras_hub/src/models/preprocessor.py +207 -0
  207. keras_hub/src/models/resnet/__init__.py +13 -0
  208. keras_hub/src/models/resnet/resnet_backbone.py +612 -0
  209. keras_hub/src/models/resnet/resnet_image_classifier.py +136 -0
  210. keras_hub/src/models/roberta/__init__.py +20 -0
  211. keras_hub/src/models/roberta/roberta_backbone.py +184 -0
  212. keras_hub/src/models/roberta/roberta_classifier.py +209 -0
  213. keras_hub/src/models/roberta/roberta_masked_lm.py +136 -0
  214. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +198 -0
  215. keras_hub/src/models/roberta/roberta_preprocessor.py +192 -0
  216. keras_hub/src/models/roberta/roberta_presets.py +43 -0
  217. keras_hub/src/models/roberta/roberta_tokenizer.py +132 -0
  218. keras_hub/src/models/seq_2_seq_lm.py +54 -0
  219. keras_hub/src/models/t5/__init__.py +20 -0
  220. keras_hub/src/models/t5/t5_backbone.py +261 -0
  221. keras_hub/src/models/t5/t5_layer_norm.py +35 -0
  222. keras_hub/src/models/t5/t5_multi_head_attention.py +324 -0
  223. keras_hub/src/models/t5/t5_presets.py +95 -0
  224. keras_hub/src/models/t5/t5_tokenizer.py +100 -0
  225. keras_hub/src/models/t5/t5_transformer_layer.py +178 -0
  226. keras_hub/src/models/task.py +419 -0
  227. keras_hub/src/models/vgg/__init__.py +13 -0
  228. keras_hub/src/models/vgg/vgg_backbone.py +158 -0
  229. keras_hub/src/models/vgg/vgg_image_classifier.py +124 -0
  230. keras_hub/src/models/vit_det/__init__.py +13 -0
  231. keras_hub/src/models/vit_det/vit_det_backbone.py +204 -0
  232. keras_hub/src/models/vit_det/vit_layers.py +565 -0
  233. keras_hub/src/models/whisper/__init__.py +20 -0
  234. keras_hub/src/models/whisper/whisper_audio_feature_extractor.py +260 -0
  235. keras_hub/src/models/whisper/whisper_backbone.py +305 -0
  236. keras_hub/src/models/whisper/whisper_cached_multi_head_attention.py +153 -0
  237. keras_hub/src/models/whisper/whisper_decoder.py +141 -0
  238. keras_hub/src/models/whisper/whisper_encoder.py +106 -0
  239. keras_hub/src/models/whisper/whisper_preprocessor.py +326 -0
  240. keras_hub/src/models/whisper/whisper_presets.py +148 -0
  241. keras_hub/src/models/whisper/whisper_tokenizer.py +163 -0
  242. keras_hub/src/models/xlm_roberta/__init__.py +26 -0
  243. keras_hub/src/models/xlm_roberta/xlm_roberta_backbone.py +81 -0
  244. keras_hub/src/models/xlm_roberta/xlm_roberta_classifier.py +225 -0
  245. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm.py +141 -0
  246. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +195 -0
  247. keras_hub/src/models/xlm_roberta/xlm_roberta_preprocessor.py +205 -0
  248. keras_hub/src/models/xlm_roberta/xlm_roberta_presets.py +43 -0
  249. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +191 -0
  250. keras_hub/src/models/xlnet/__init__.py +13 -0
  251. keras_hub/src/models/xlnet/relative_attention.py +459 -0
  252. keras_hub/src/models/xlnet/xlnet_backbone.py +222 -0
  253. keras_hub/src/models/xlnet/xlnet_content_and_query_embedding.py +133 -0
  254. keras_hub/src/models/xlnet/xlnet_encoder.py +378 -0
  255. keras_hub/src/samplers/__init__.py +13 -0
  256. keras_hub/src/samplers/beam_sampler.py +207 -0
  257. keras_hub/src/samplers/contrastive_sampler.py +231 -0
  258. keras_hub/src/samplers/greedy_sampler.py +50 -0
  259. keras_hub/src/samplers/random_sampler.py +77 -0
  260. keras_hub/src/samplers/sampler.py +237 -0
  261. keras_hub/src/samplers/serialization.py +97 -0
  262. keras_hub/src/samplers/top_k_sampler.py +92 -0
  263. keras_hub/src/samplers/top_p_sampler.py +113 -0
  264. keras_hub/src/tests/__init__.py +13 -0
  265. keras_hub/src/tests/test_case.py +608 -0
  266. keras_hub/src/tokenizers/__init__.py +13 -0
  267. keras_hub/src/tokenizers/byte_pair_tokenizer.py +638 -0
  268. keras_hub/src/tokenizers/byte_tokenizer.py +299 -0
  269. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +267 -0
  270. keras_hub/src/tokenizers/sentence_piece_tokenizer_trainer.py +150 -0
  271. keras_hub/src/tokenizers/tokenizer.py +235 -0
  272. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +355 -0
  273. keras_hub/src/tokenizers/word_piece_tokenizer.py +544 -0
  274. keras_hub/src/tokenizers/word_piece_tokenizer_trainer.py +176 -0
  275. keras_hub/src/utils/__init__.py +13 -0
  276. keras_hub/src/utils/keras_utils.py +130 -0
  277. keras_hub/src/utils/pipeline_model.py +293 -0
  278. keras_hub/src/utils/preset_utils.py +621 -0
  279. keras_hub/src/utils/python_utils.py +21 -0
  280. keras_hub/src/utils/tensor_utils.py +206 -0
  281. keras_hub/src/utils/timm/__init__.py +13 -0
  282. keras_hub/src/utils/timm/convert.py +37 -0
  283. keras_hub/src/utils/timm/convert_resnet.py +171 -0
  284. keras_hub/src/utils/transformers/__init__.py +13 -0
  285. keras_hub/src/utils/transformers/convert.py +101 -0
  286. keras_hub/src/utils/transformers/convert_bert.py +173 -0
  287. keras_hub/src/utils/transformers/convert_distilbert.py +184 -0
  288. keras_hub/src/utils/transformers/convert_gemma.py +187 -0
  289. keras_hub/src/utils/transformers/convert_gpt2.py +186 -0
  290. keras_hub/src/utils/transformers/convert_llama3.py +136 -0
  291. keras_hub/src/utils/transformers/convert_pali_gemma.py +303 -0
  292. keras_hub/src/utils/transformers/safetensor_utils.py +97 -0
  293. keras_hub/src/version_utils.py +23 -0
  294. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +34 -0
  295. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +297 -0
  296. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/WHEEL +5 -0
  297. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/top_level.txt +1 -0
@@ -0,0 +1,43 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """FNet model preset configurations."""
15
+
16
+ backbone_presets = {
17
+ "f_net_base_en": {
18
+ "metadata": {
19
+ "description": (
20
+ "12-layer FNet model where case is maintained. "
21
+ "Trained on the C4 dataset."
22
+ ),
23
+ "params": 82861056,
24
+ "official_name": "FNet",
25
+ "path": "f_net",
26
+ "model_card": "https://github.com/google-research/google-research/blob/master/f_net/README.md",
27
+ },
28
+ "kaggle_handle": "kaggle://keras/f_net/keras/f_net_base_en/2",
29
+ },
30
+ "f_net_large_en": {
31
+ "metadata": {
32
+ "description": (
33
+ "24-layer FNet model where case is maintained. "
34
+ "Trained on the C4 dataset."
35
+ ),
36
+ "params": 236945408,
37
+ "official_name": "FNet",
38
+ "path": "f_net",
39
+ "model_card": "https://github.com/google-research/google-research/blob/master/f_net/README.md",
40
+ },
41
+ "kaggle_handle": "kaggle://keras/f_net/keras/f_net_large_en/2",
42
+ },
43
+ }
@@ -0,0 +1,95 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
18
+ SentencePieceTokenizer,
19
+ )
20
+
21
+
22
+ @keras_hub_export("keras_hub.models.FNetTokenizer")
23
+ class FNetTokenizer(SentencePieceTokenizer):
24
+ """FNet tokenizer layer based on SentencePiece.
25
+
26
+ This tokenizer class will tokenize raw strings into integer sequences and
27
+ is based on `keras_hub.tokenizers.SentencePieceTokenizer`. Unlike the
28
+ underlying tokenizer, it will check for all special tokens needed by
29
+ FNet models and provides a `from_preset()` method to automatically
30
+ download a matching vocabulary for a FNet preset.
31
+
32
+ This tokenizer does not provide truncation or padding of inputs. It can be
33
+ combined with a `keras_hub.models.FNetPreprocessor` layer for input
34
+ packing.
35
+
36
+ If input is a batch of strings (rank > 0), the layer will output a
37
+ `tf.RaggedTensor` where the last dimension of the output is ragged.
38
+
39
+ If input is a scalar string (rank == 0), the layer will output a dense
40
+ `tf.Tensor` with static shape `[None]`.
41
+
42
+ Args:
43
+ proto: Either a `string` path to a SentencePiece proto file, or a
44
+ `bytes` object with a serialized SentencePiece proto. See the
45
+ [SentencePiece repository](https://github.com/google/sentencepiece)
46
+ for more details on the format.
47
+
48
+ Examples:
49
+ ```python
50
+ # Unbatched input.
51
+ tokenizer = keras_hub.models.FNetTokenizer.from_preset(
52
+ "f_net_base_en",
53
+ )
54
+ tokenizer("The quick brown fox jumped.")
55
+
56
+ # Batched input.
57
+ tokenizer(["The quick brown fox jumped.", "The fox slept."])
58
+
59
+ # Detokenization.
60
+ tokenizer.detokenize(tokenizer("The quick brown fox jumped."))
61
+ ```
62
+ """
63
+
64
+ def __init__(self, proto, **kwargs):
65
+ self.cls_token = "[CLS]"
66
+ self.sep_token = "[SEP]"
67
+ self.pad_token = "<pad>"
68
+ self.mask_token = "[MASK]"
69
+ super().__init__(proto=proto, **kwargs)
70
+
71
+ def set_proto(self, proto):
72
+ super().set_proto(proto)
73
+ if proto is not None:
74
+ for token in [
75
+ self.cls_token,
76
+ self.sep_token,
77
+ self.pad_token,
78
+ self.mask_token,
79
+ ]:
80
+ if token not in self.get_vocabulary():
81
+ raise ValueError(
82
+ f"Cannot find token `'{token}'` in the provided "
83
+ f"`vocabulary`. Please provide `'{token}'` in your "
84
+ "`vocabulary` or use a pretrained `vocabulary` name."
85
+ )
86
+
87
+ self.cls_token_id = self.token_to_id(self.cls_token)
88
+ self.sep_token_id = self.token_to_id(self.sep_token)
89
+ self.pad_token_id = self.token_to_id(self.pad_token)
90
+ self.mask_token_id = self.token_to_id(self.mask_token)
91
+ else:
92
+ self.cls_token_id = None
93
+ self.sep_token_id = None
94
+ self.pad_token_id = None
95
+ self.mask_token_id = None
@@ -0,0 +1,20 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from keras_hub.src.models.falcon.falcon_backbone import FalconBackbone
16
+ from keras_hub.src.models.falcon.falcon_presets import backbone_presets
17
+ from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer
18
+ from keras_hub.src.utils.preset_utils import register_presets
19
+
20
+ register_presets(backbone_presets, (FalconBackbone, FalconTokenizer))
@@ -0,0 +1,156 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import math
15
+
16
+ import keras
17
+ from keras import ops
18
+
19
+
20
+ class FalconAttention(keras.layers.Layer):
21
+ def __init__(
22
+ self,
23
+ num_heads,
24
+ attention_dropout_rate,
25
+ **kwargs,
26
+ ):
27
+ super().__init__(**kwargs)
28
+ self.num_heads = num_heads
29
+ self.attention_dropout_rate = attention_dropout_rate
30
+
31
+ def build(self, inputs_shape):
32
+ # Einsum variables:
33
+ # b = batch size
34
+ # q = query length
35
+ # m = model dim
36
+ # n = num attention heads
37
+ # h = head dim
38
+ # k = key/value length
39
+
40
+ batch_size, seq_length, hidden_dim = inputs_shape
41
+
42
+ self.head_dim = hidden_dim // self.num_heads
43
+
44
+ # Layer-wise attention scaling
45
+ self.inv_norm_factor = 1.0 / math.sqrt(self.head_dim)
46
+
47
+ self.query_dense = keras.layers.EinsumDense(
48
+ equation="bqm,mnh->bqnh",
49
+ output_shape=(None, self.num_heads, self.head_dim),
50
+ bias_axes="nh",
51
+ dtype=self.dtype_policy,
52
+ name="query_dense",
53
+ )
54
+ self.query_dense.build(inputs_shape)
55
+
56
+ self.key_dense = keras.layers.EinsumDense(
57
+ equation="bkm,mnh->bknh",
58
+ output_shape=(None, self.num_heads, self.head_dim),
59
+ bias_axes="nh",
60
+ dtype=self.dtype_policy,
61
+ name="key_dense",
62
+ )
63
+ self.key_dense.build(inputs_shape)
64
+
65
+ self.value_dense = keras.layers.EinsumDense(
66
+ equation="bkm,mnh->bknh",
67
+ output_shape=(None, self.num_heads, self.head_dim),
68
+ bias_axes="nh",
69
+ dtype=self.dtype_policy,
70
+ name="value_dense",
71
+ )
72
+ self.value_dense.build(inputs_shape)
73
+
74
+ self.attention_dropout = keras.layers.Dropout(
75
+ rate=self.attention_dropout_rate,
76
+ dtype=self.dtype_policy,
77
+ name="attention_dropout",
78
+ )
79
+
80
+ self.output_dense = keras.layers.Dense(
81
+ hidden_dim,
82
+ dtype=self.dtype_policy,
83
+ name="output_dense",
84
+ )
85
+ self.output_dense.build(inputs_shape)
86
+
87
+ self.softmax = keras.layers.Softmax(dtype="float32", name="softmax")
88
+
89
+ self.built = True
90
+
91
+ def call(
92
+ self,
93
+ inputs,
94
+ alibi,
95
+ attention_mask=None,
96
+ cache=None,
97
+ cache_update_index=None,
98
+ ):
99
+ batch_size, seq_length, hidden_dim = ops.shape(inputs)
100
+
101
+ query = self.query_dense(inputs)
102
+ key = self.key_dense(inputs)
103
+ value = self.value_dense(inputs)
104
+
105
+ if cache is not None:
106
+ key_cache = cache[:, 0, ...]
107
+ value_cache = cache[:, 1, ...]
108
+ if cache_update_index is None:
109
+ key = key_cache
110
+ value = value_cache
111
+ else:
112
+ start = [0, cache_update_index, 0, 0]
113
+ key = ops.slice_update(key_cache, start, key)
114
+ value = ops.slice_update(value_cache, start, value)
115
+ cache = ops.stack((key, value), axis=1)
116
+ else:
117
+ if cache_update_index is not None:
118
+ raise ValueError(
119
+ "`cache_update_index` should not be set if `cache` is "
120
+ f"`None`. Received: cache={cache}, "
121
+ f"cache_update_index={cache_update_index}"
122
+ )
123
+
124
+ attention_scores = ops.einsum("bqnh,bknh->bnqk", query, key)
125
+ attention_scores = ops.add(attention_scores, alibi)
126
+ attention_scores = (
127
+ attention_scores * self.inv_norm_factor
128
+ ) # [batch_size, num_heads, query_length, kv_length]
129
+ attention_scores = self.softmax(
130
+ attention_scores, ops.expand_dims(attention_mask, 1)
131
+ )
132
+ attention_scores = self.attention_dropout(attention_scores)
133
+ attention_output = ops.einsum(
134
+ "bnqk,bknh->bqnh", attention_scores, value
135
+ )
136
+ attention_output = ops.reshape(
137
+ attention_output,
138
+ [batch_size, seq_length, self.num_heads * self.head_dim],
139
+ ) # [batch_size, query_length, hidden_dim]
140
+
141
+ attention_output = self.output_dense(attention_output)
142
+
143
+ if cache is not None:
144
+ return attention_output, cache
145
+
146
+ return attention_output
147
+
148
+ def get_config(self):
149
+ config = super().get_config()
150
+ config.update(
151
+ {
152
+ "num_heads": self.num_heads,
153
+ "attention_dropout_rate": self.attention_dropout_rate,
154
+ }
155
+ )
156
+ return config
@@ -0,0 +1,164 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import keras
15
+
16
+ from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.layers.modeling.reversible_embedding import (
18
+ ReversibleEmbedding,
19
+ )
20
+ from keras_hub.src.models.backbone import Backbone
21
+ from keras_hub.src.models.falcon.falcon_transformer_decoder import (
22
+ FalconTransformerDecoder,
23
+ )
24
+
25
+
26
+ @keras_hub_export("keras_hub.models.FalconBackbone")
27
+ class FalconBackbone(Backbone):
28
+ """The Falcon core architecure.
29
+
30
+ This network implements a Transformer-based decoder-only network,
31
+ [Falcon](https://arxiv.org/abs/2306.01116).
32
+
33
+ Args:
34
+ vocabulary_size: int. The size of the token vocabulary.
35
+ num_layers: int. The number of transformer layers.
36
+ num_attention_heads: int. The number of attention heads for each transformer.
37
+ The hidden size must be divisible by the number of attention heads.
38
+ hidden_dim: int. The dimensionality of the embeddings and hidden states.
39
+ intermediate_dim: int. The output dimension of the first Dense layer in
40
+ the MLP network of each transformer.
41
+ layer_norm_epsilon: float. Epsilon for the layer normalization layers in
42
+ the transformer decoder.
43
+ attention_dropout_rate: float. Dropout probability for the attention.
44
+ feedforward_dropout_rate: flaot. Dropout probability for the feedforward.
45
+ dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
46
+ for model computations and weights. Note that some computations,
47
+ such as softmax and layer normalization, will always be done at
48
+ float32 precision regardless of dtype.
49
+
50
+ Examples:
51
+ ```python
52
+ input_data = {
53
+ "token_ids": np.ones(shape=(1, 12), dtype="int32"),
54
+ "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]),
55
+ }
56
+
57
+ # Pretrained Falcon decoder.
58
+ # TODO: Update the preset.
59
+ model = keras_hub.models.FalconBackbone.from_preset("falcon_preset")
60
+ model(input_data)
61
+
62
+ # Randomly initialized Falcon decoder with a custom config.
63
+ model = keras_hub.models.FalconBackbone(
64
+ vocabulary_size=10,
65
+ num_layers=2,
66
+ num_attention_heads=2,
67
+ hidden_dim=32,
68
+ intermediate_dim=32*4,
69
+ layer_norm_epsilon=1e-5,
70
+ attention_dropout_rate=0,
71
+ feedforward_dropout_rate=0,
72
+ dtype="float32",
73
+ )
74
+ model(input_data)
75
+ ```
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ vocabulary_size,
81
+ num_layers,
82
+ num_attention_heads,
83
+ hidden_dim,
84
+ intermediate_dim,
85
+ layer_norm_epsilon=1e-5,
86
+ attention_dropout_rate=0,
87
+ feedforward_dropout_rate=0,
88
+ dtype=None,
89
+ **kwargs,
90
+ ):
91
+ # === Layers ===
92
+ self.token_embedding = ReversibleEmbedding(
93
+ input_dim=vocabulary_size,
94
+ output_dim=hidden_dim,
95
+ dtype=dtype,
96
+ name="token_embedding",
97
+ )
98
+
99
+ self.transformer_layers = []
100
+ for i in range(num_layers):
101
+ layer = FalconTransformerDecoder(
102
+ num_attention_heads=num_attention_heads,
103
+ intermediate_dim=intermediate_dim,
104
+ attention_dropout_rate=attention_dropout_rate,
105
+ feedforward_dropout_rate=feedforward_dropout_rate,
106
+ dtype=dtype,
107
+ name=f"transformer_layer_{i}",
108
+ )
109
+ self.transformer_layers.append(layer)
110
+
111
+ self.final_layernorm = keras.layers.LayerNormalization(
112
+ epsilon=layer_norm_epsilon,
113
+ dtype=dtype,
114
+ name="final_layernorm",
115
+ )
116
+
117
+ # === Functional Model ===
118
+ token_ids = keras.Input(shape=(None,), dtype="int32", name="token_ids")
119
+ padding_mask = keras.Input(
120
+ shape=(None,), dtype="int32", name="padding_mask"
121
+ )
122
+ # Embed Tokens.
123
+ x = self.token_embedding(token_ids)
124
+
125
+ # Apply successive transformer decoder blocks.
126
+ for transformer_layer in self.transformer_layers:
127
+ x = transformer_layer(inputs=x, decoder_padding_mask=padding_mask)
128
+ sequence_output = self.final_layernorm(x)
129
+
130
+ super().__init__(
131
+ inputs={
132
+ "token_ids": token_ids,
133
+ "padding_mask": padding_mask,
134
+ },
135
+ outputs=sequence_output,
136
+ dtype=dtype,
137
+ **kwargs,
138
+ )
139
+
140
+ # === Config ===
141
+ self.vocabulary_size = vocabulary_size
142
+ self.num_layers = num_layers
143
+ self.num_attention_heads = num_attention_heads
144
+ self.hidden_dim = hidden_dim
145
+ self.intermediate_dim = intermediate_dim
146
+ self.attention_dropout_rate = attention_dropout_rate
147
+ self.feedforward_dropout_rate = feedforward_dropout_rate
148
+ self.layer_norm_epsilon = layer_norm_epsilon
149
+
150
+ def get_config(self):
151
+ config = super().get_config()
152
+ config.update(
153
+ {
154
+ "vocabulary_size": self.vocabulary_size,
155
+ "num_layers": self.num_layers,
156
+ "num_attention_heads": self.num_attention_heads,
157
+ "hidden_dim": self.hidden_dim,
158
+ "intermediate_dim": self.intermediate_dim,
159
+ "attention_dropout_rate": self.attention_dropout_rate,
160
+ "feedforward_dropout_rate": self.feedforward_dropout_rate,
161
+ "layer_norm_epsilon": self.layer_norm_epsilon,
162
+ }
163
+ )
164
+ return config