keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. keras_hub/__init__.py +0 -6
  2. keras_hub/api/__init__.py +2 -0
  3. keras_hub/api/bounding_box/__init__.py +36 -0
  4. keras_hub/api/layers/__init__.py +14 -0
  5. keras_hub/api/models/__init__.py +97 -48
  6. keras_hub/api/tokenizers/__init__.py +30 -0
  7. keras_hub/api/utils/__init__.py +22 -0
  8. keras_hub/src/api_export.py +15 -9
  9. keras_hub/src/bounding_box/__init__.py +13 -0
  10. keras_hub/src/bounding_box/converters.py +529 -0
  11. keras_hub/src/bounding_box/formats.py +162 -0
  12. keras_hub/src/bounding_box/iou.py +263 -0
  13. keras_hub/src/bounding_box/to_dense.py +95 -0
  14. keras_hub/src/bounding_box/to_ragged.py +99 -0
  15. keras_hub/src/bounding_box/utils.py +194 -0
  16. keras_hub/src/bounding_box/validate_format.py +99 -0
  17. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  18. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  19. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  20. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  21. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  22. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  23. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  24. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  25. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  26. keras_hub/src/models/albert/__init__.py +1 -2
  27. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  28. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
  29. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  30. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  31. keras_hub/src/models/backbone.py +12 -34
  32. keras_hub/src/models/bart/__init__.py +1 -2
  33. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  34. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  35. keras_hub/src/models/bert/__init__.py +1 -5
  36. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  37. keras_hub/src/models/bert/bert_presets.py +1 -4
  38. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
  39. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  40. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  41. keras_hub/src/models/bloom/__init__.py +1 -2
  42. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  43. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  44. keras_hub/src/models/causal_lm.py +10 -29
  45. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  46. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  47. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  48. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  49. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
  50. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  51. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  52. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  53. keras_hub/src/models/distil_bert/__init__.py +1 -4
  54. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  55. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
  56. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  57. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  58. keras_hub/src/models/efficientnet/__init__.py +13 -0
  59. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  60. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  61. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  62. keras_hub/src/models/electra/__init__.py +1 -2
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  72. keras_hub/src/models/gemma/__init__.py +1 -2
  73. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  74. keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
  75. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  76. keras_hub/src/models/gpt2/__init__.py +1 -2
  77. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  78. keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
  79. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  80. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  82. keras_hub/src/models/image_classifier.py +0 -5
  83. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  84. keras_hub/src/models/llama/__init__.py +1 -2
  85. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  86. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  87. keras_hub/src/models/llama3/__init__.py +1 -2
  88. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  89. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  90. keras_hub/src/models/masked_lm.py +0 -2
  91. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  92. keras_hub/src/models/mistral/__init__.py +1 -2
  93. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  94. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  95. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  96. keras_hub/src/models/mobilenet/__init__.py +13 -0
  97. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  98. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  99. keras_hub/src/models/opt/__init__.py +1 -2
  100. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  101. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  102. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  103. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  104. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  105. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  106. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
  107. keras_hub/src/models/phi3/__init__.py +1 -2
  108. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  109. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  110. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  111. keras_hub/src/models/preprocessor.py +72 -83
  112. keras_hub/src/models/resnet/__init__.py +6 -0
  113. keras_hub/src/models/resnet/resnet_backbone.py +390 -42
  114. keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
  115. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  116. keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
  117. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  118. keras_hub/src/models/retinanet/__init__.py +13 -0
  119. keras_hub/src/models/retinanet/anchor_generator.py +175 -0
  120. keras_hub/src/models/retinanet/box_matcher.py +259 -0
  121. keras_hub/src/models/retinanet/non_max_supression.py +578 -0
  122. keras_hub/src/models/roberta/__init__.py +1 -2
  123. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  124. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
  125. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  126. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  127. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  128. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  129. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  130. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  131. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  133. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  134. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  135. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  136. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  137. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  138. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  139. keras_hub/src/models/t5/__init__.py +1 -2
  140. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  141. keras_hub/src/models/task.py +71 -116
  142. keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
  143. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  144. keras_hub/src/models/whisper/__init__.py +1 -2
  145. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  146. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  147. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  148. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  149. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  150. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  151. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
  152. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  154. keras_hub/src/tests/test_case.py +46 -0
  155. keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
  156. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  157. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
  158. keras_hub/src/tokenizers/tokenizer.py +67 -32
  159. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
  161. keras_hub/src/utils/imagenet/__init__.py +13 -0
  162. keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
  163. keras_hub/src/utils/keras_utils.py +0 -50
  164. keras_hub/src/utils/preset_utils.py +230 -68
  165. keras_hub/src/utils/tensor_utils.py +187 -69
  166. keras_hub/src/utils/timm/convert_resnet.py +19 -16
  167. keras_hub/src/utils/timm/preset_loader.py +66 -0
  168. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  169. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  170. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  171. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  172. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  173. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  174. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  175. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  176. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  177. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  178. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  179. keras_hub/src/version_utils.py +1 -1
  180. keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
  181. keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
  182. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
  183. keras_hub/src/models/bart/bart_preprocessor.py +0 -276
  184. keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
  185. keras_hub/src/models/electra/electra_preprocessor.py +0 -154
  186. keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
  187. keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
  188. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
  189. keras_hub/src/models/llama/llama_preprocessor.py +0 -189
  190. keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
  191. keras_hub/src/models/opt/opt_preprocessor.py +0 -188
  192. keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
  193. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  194. keras_hub/src/utils/timm/convert.py +0 -37
  195. keras_hub/src/utils/transformers/convert.py +0 -101
  196. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
  197. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  198. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -12,19 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import keras
16
- from absl import logging
17
-
18
15
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.phi3.phi3_preprocessor import Phi3Preprocessor
20
- from keras_hub.src.utils.keras_utils import (
21
- convert_inputs_to_list_of_tensor_segments,
22
- )
23
- from keras_hub.src.utils.tensor_utils import strip_to_ragged
16
+ from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
17
+ from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone
18
+ from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer
24
19
 
25
20
 
26
21
  @keras_hub_export("keras_hub.models.Phi3CausalLMPreprocessor")
27
- class Phi3CausalLMPreprocessor(Phi3Preprocessor):
22
+ class Phi3CausalLMPreprocessor(CausalLMPreprocessor):
28
23
  """Phi3 Causal LM preprocessor.
29
24
 
30
25
  This preprocessing layer is meant for use with
@@ -91,83 +86,5 @@ class Phi3CausalLMPreprocessor(Phi3Preprocessor):
91
86
  ```
92
87
  """
93
88
 
94
- def call(
95
- self,
96
- x,
97
- y=None,
98
- sample_weight=None,
99
- sequence_length=None,
100
- ):
101
- if y is not None or sample_weight is not None:
102
- logging.warning(
103
- "`Phi3CausalLMPreprocessor` generates `y` and "
104
- "`sample_weight` based on your input data, but your data "
105
- "already contains `y` or `sample_weight`. Your `y` and "
106
- "`sample_weight` will be ignored."
107
- )
108
- sequence_length = sequence_length or self.sequence_length
109
-
110
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
111
- x = self.tokenizer(x)
112
- # Pad with one extra token to account for the truncation below.
113
- token_ids, padding_mask = self.packer(
114
- x,
115
- sequence_length=sequence_length + 1,
116
- add_start_value=self.add_start_token,
117
- add_end_value=self.add_end_token,
118
- )
119
- # The last token does not have a next token, so we truncate it out.
120
- x = {
121
- "token_ids": token_ids[..., :-1],
122
- "padding_mask": padding_mask[..., :-1],
123
- }
124
- # Target `y` will be the next token.
125
- y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
126
- return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
127
-
128
- def generate_preprocess(
129
- self,
130
- x,
131
- sequence_length=None,
132
- ):
133
- """Convert strings to integer token input for generation.
134
-
135
- Similar to calling the layer for training, this method takes in strings
136
- or tensor strings, tokenizes and packs the input, and computes a padding
137
- mask masking all inputs not filled in with a padded value.
138
-
139
- Unlike calling the layer for training, this method does not compute
140
- labels and will never append a `tokenizer.end_token_id` to the end of
141
- the sequence (as generation is expected to continue at the end of the
142
- inputted prompt).
143
- """
144
- if not self.built:
145
- self.build(None)
146
-
147
- x = convert_inputs_to_list_of_tensor_segments(x)[0]
148
- x = self.tokenizer(x)
149
- token_ids, padding_mask = self.packer(
150
- x, sequence_length=sequence_length, add_end_value=False
151
- )
152
- return {
153
- "token_ids": token_ids,
154
- "padding_mask": padding_mask,
155
- }
156
-
157
- def generate_postprocess(
158
- self,
159
- x,
160
- ):
161
- """Convert integer token output to strings for generation.
162
-
163
- This method reverses `generate_preprocess()`, by first removing all
164
- padding and start/end tokens, and then converting the integer sequence
165
- back to a string.
166
- """
167
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
168
- ids_to_strip = (
169
- self.tokenizer.start_token_id,
170
- self.tokenizer.end_token_id,
171
- )
172
- token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
173
- return self.tokenizer.detokenize(token_ids)
89
+ backbone_cls = Phi3Backbone
90
+ tokenizer_cls = Phi3Tokenizer
@@ -11,17 +11,19 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- import copy
15
-
16
14
  from keras_hub.src.api_export import keras_hub_export
17
- from keras_hub.src.models.phi3.phi3_presets import backbone_presets
15
+ from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone
18
16
  from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
19
17
  SentencePieceTokenizer,
20
18
  )
21
- from keras_hub.src.utils.python_utils import classproperty
22
19
 
23
20
 
24
- @keras_hub_export("keras_hub.models.Phi3Tokenizer")
21
+ @keras_hub_export(
22
+ [
23
+ "keras_hub.tokenizers.Phi3Tokenizer",
24
+ "keras_hub.models.Phi3Tokenizer",
25
+ ]
26
+ )
25
27
  class Phi3Tokenizer(SentencePieceTokenizer):
26
28
  """Phi3 tokenizer layer based on SentencePiece.
27
29
 
@@ -31,10 +33,6 @@ class Phi3Tokenizer(SentencePieceTokenizer):
31
33
  Phi3 models and provides a `from_preset()` method to automatically
32
34
  download a matching vocabulary for a Phi3 preset.
33
35
 
34
- This tokenizer does not provide truncation or padding of inputs. It can be
35
- combined with a `keras_hub.models.Phi3Preprocessor` layer for input
36
- packing.
37
-
38
36
  If input is a batch of strings (rank > 0), the layer will output a
39
37
  `tf.RaggedTensor` where the last dimension of the output is ragged.
40
38
 
@@ -63,32 +61,10 @@ class Phi3Tokenizer(SentencePieceTokenizer):
63
61
  ```
64
62
  """
65
63
 
64
+ backbone_cls = Phi3Backbone
65
+
66
66
  def __init__(self, proto, **kwargs):
67
- self.start_token = "<s>"
68
- self.end_token = "<|endoftext|>"
67
+ self._add_special_token("<s>", "start_token")
68
+ self._add_special_token("<|endoftext|>", "end_token")
69
+ self.pad_token_id = 0
69
70
  super().__init__(proto=proto, **kwargs)
70
-
71
- def set_proto(self, proto):
72
- super().set_proto(proto)
73
- if proto is not None:
74
- for token in [self.start_token, self.end_token]:
75
- if token not in self.get_vocabulary():
76
- raise ValueError(
77
- f"Cannot find token `'{token}'` in the provided "
78
- f"`vocabulary`. Please provide `'{token}'` in your "
79
- "`vocabulary` or use a pretrained `vocabulary` name."
80
- )
81
- self.start_token_id = self.token_to_id(self.start_token)
82
- self.end_token_id = self.token_to_id(self.end_token)
83
- # TODO: `pad_token` is `<|endoftext|>`, but setting it to `<unk>`
84
- # for now, because of the way sampler works. sampler will think that
85
- # `pad_token` is `end_token` and stop generation immediatly.
86
- self.pad_token_id = 0
87
- else:
88
- self.start_token_id = None
89
- self.end_token_id = None
90
- self.pad_token_id = None
91
-
92
- @classproperty
93
- def presets(cls):
94
- return copy.deepcopy(backbone_presets)
@@ -19,13 +19,9 @@ from keras_hub.src.layers.preprocessing.preprocessing_layer import (
19
19
  PreprocessingLayer,
20
20
  )
21
21
  from keras_hub.src.utils.preset_utils import PREPROCESSOR_CONFIG_FILE
22
- from keras_hub.src.utils.preset_utils import TOKENIZER_CONFIG_FILE
23
- from keras_hub.src.utils.preset_utils import check_config_class
24
- from keras_hub.src.utils.preset_utils import check_file_exists
25
- from keras_hub.src.utils.preset_utils import check_format
26
- from keras_hub.src.utils.preset_utils import list_presets
27
- from keras_hub.src.utils.preset_utils import list_subclasses
28
- from keras_hub.src.utils.preset_utils import load_serialized_object
22
+ from keras_hub.src.utils.preset_utils import builtin_presets
23
+ from keras_hub.src.utils.preset_utils import find_subclass
24
+ from keras_hub.src.utils.preset_utils import get_preset_loader
29
25
  from keras_hub.src.utils.preset_utils import save_serialized_object
30
26
  from keras_hub.src.utils.python_utils import classproperty
31
27
 
@@ -45,15 +41,20 @@ class Preprocessor(PreprocessingLayer):
45
41
  should set the `tokenizer` property on construction.
46
42
  """
47
43
 
44
+ backbone_cls = None
48
45
  tokenizer_cls = None
46
+ audio_converter_cls = None
47
+ image_converter_cls = None
49
48
 
50
49
  def __init__(self, *args, **kwargs):
51
50
  super().__init__(*args, **kwargs)
52
51
  self._tokenizer = None
52
+ self._image_converter = None
53
+ self._audio_converter = None
53
54
 
54
55
  def __setattr__(self, name, value):
55
56
  # Work around torch setattr for properties.
56
- if name in ["tokenizer"]:
57
+ if name in ["tokenizer", "audio_converter", "image_converter"]:
57
58
  return object.__setattr__(self, name, value)
58
59
  return super().__setattr__(name, value)
59
60
 
@@ -66,26 +67,60 @@ class Preprocessor(PreprocessingLayer):
66
67
  def tokenizer(self, value):
67
68
  self._tokenizer = value
68
69
 
70
+ @property
71
+ def audio_converter(self):
72
+ """The audio converter used to preprocess audio data."""
73
+ return self._audio_converter
74
+
75
+ @audio_converter.setter
76
+ def audio_converter(self, value):
77
+ self._audio_converter = value
78
+
79
+ @property
80
+ def image_converter(self):
81
+ """The image converter used to preprocess image data."""
82
+ return self._image_converter
83
+
84
+ @image_converter.setter
85
+ def image_converter(self, value):
86
+ self._image_converter = value
87
+
69
88
  def get_config(self):
70
89
  config = super().get_config()
71
- config["tokenizer"] = keras.layers.serialize(self.tokenizer)
90
+ if self.tokenizer:
91
+ config["tokenizer"] = keras.layers.serialize(self.tokenizer)
92
+ if self.audio_converter:
93
+ config["audio_converter"] = keras.layers.serialize(
94
+ self.audio_converter
95
+ )
96
+ if self.image_converter:
97
+ config["image_converter"] = keras.layers.serialize(
98
+ self.image_converter
99
+ )
72
100
  return config
73
101
 
74
102
  @classmethod
75
103
  def from_config(cls, config):
76
104
  if "tokenizer" in config and isinstance(config["tokenizer"], dict):
77
105
  config["tokenizer"] = keras.layers.deserialize(config["tokenizer"])
106
+ if "audio_converter" in config and isinstance(
107
+ config["audio_converter"], dict
108
+ ):
109
+ config["audio_converter"] = keras.layers.deserialize(
110
+ config["audio_converter"]
111
+ )
112
+ if "image_converter" in config and isinstance(
113
+ config["image_converter"], dict
114
+ ):
115
+ config["image_converter"] = keras.layers.deserialize(
116
+ config["image_converter"]
117
+ )
78
118
  return cls(**config)
79
119
 
80
120
  @classproperty
81
121
  def presets(cls):
82
- presets = list_presets(cls)
83
- # We can also load backbone presets.
84
- if cls.tokenizer_cls is not None:
85
- presets.update(cls.tokenizer_cls.presets)
86
- for subclass in list_subclasses(cls):
87
- presets.update(subclass.presets)
88
- return presets
122
+ """List built-in presets for a `Preprocessor` subclass."""
123
+ return builtin_presets(cls)
89
124
 
90
125
  @classmethod
91
126
  def from_preset(
@@ -96,10 +131,10 @@ class Preprocessor(PreprocessingLayer):
96
131
  """Instantiate a `keras_hub.models.Preprocessor` from a model preset.
97
132
 
98
133
  A preset is a directory of configs, weights and other file assets used
99
- to save and load a pre-trained model. The `preset` can be passed as a
134
+ to save and load a pre-trained model. The `preset` can be passed as
100
135
  one of:
101
136
 
102
- 1. a built in preset identifier like `'bert_base_en'`
137
+ 1. a built-in preset identifier like `'bert_base_en'`
103
138
  2. a Kaggle Models handle like `'kaggle://user/bert/keras/bert_base_en'`
104
139
  3. a Hugging Face handle like `'hf://user/bert_base_en'`
105
140
  4. a path to a local preset directory like `'./bert_base_en'`
@@ -109,10 +144,10 @@ class Preprocessor(PreprocessingLayer):
109
144
 
110
145
  As there are usually multiple preprocessing classes for a given model,
111
146
  this method should be called on a specific subclass like
112
- `keras_hub.models.BertPreprocessor.from_preset()`.
147
+ `keras_hub.models.BertTextClassifierPreprocessor.from_preset()`.
113
148
 
114
149
  Args:
115
- preset: string. A built in preset identifier, a Kaggle Models
150
+ preset: string. A built-in preset identifier, a Kaggle Models
116
151
  handle, a Hugging Face handle, or a path to a local directory.
117
152
 
118
153
  Examples:
@@ -123,75 +158,24 @@ class Preprocessor(PreprocessingLayer):
123
158
  )
124
159
 
125
160
  # Load a preprocessor for Bert classification.
126
- preprocessor = keras_hub.models.BertPreprocessor.from_preset(
161
+ preprocessor = keras_hub.models.BertTextClassifierPreprocessor.from_preset(
127
162
  "bert_base_en",
128
163
  )
129
164
  ```
130
165
  """
131
- format = check_format(preset)
132
-
133
- if format == "transformers":
134
- if cls.tokenizer_cls is None:
135
- raise ValueError("Tokenizer class is None")
136
- tokenizer = cls.tokenizer_cls.from_preset(preset)
137
- return cls(tokenizer=tokenizer, **kwargs)
138
-
139
166
  if cls == Preprocessor:
140
167
  raise ValueError(
141
- "Do not call `Preprocessor.from_preset()` directly. Instead call a "
142
- "choose a particular task class, e.g. "
143
- "`keras_hub.models.BertPreprocessor.from_preset()`."
144
- )
145
- # Check if we should load a `preprocessor.json` directly.
146
- load_preprocessor_config = False
147
- if check_file_exists(preset, PREPROCESSOR_CONFIG_FILE):
148
- preprocessor_preset_cls = check_config_class(
149
- preset, PREPROCESSOR_CONFIG_FILE
168
+ "Do not call `Preprocessor.from_preset()` directly. Instead "
169
+ "choose a particular task preprocessing class, e.g. "
170
+ "`keras_hub.models.TextClassifierPreprocessor.from_preset()`."
150
171
  )
151
- if issubclass(preprocessor_preset_cls, cls):
152
- load_preprocessor_config = True
153
- if load_preprocessor_config:
154
- # Preprocessor case.
155
- preprocessor = load_serialized_object(
156
- preset,
157
- PREPROCESSOR_CONFIG_FILE,
158
- )
159
- preprocessor.tokenizer.load_preset_assets(preset)
160
- return preprocessor
161
-
162
- # Tokenizer case.
163
- # If `preprocessor.json` doesn't exist or preprocessor preset class is
164
- # different from the calling class, create the preprocessor based on
165
- # `tokenizer.json`.
166
- tokenizer_preset_cls = check_config_class(
167
- preset, config_file=TOKENIZER_CONFIG_FILE
168
- )
169
- if tokenizer_preset_cls is not cls.tokenizer_cls:
170
- subclasses = list_subclasses(cls)
171
- subclasses = tuple(
172
- filter(
173
- lambda x: x.tokenizer_cls == tokenizer_preset_cls,
174
- subclasses,
175
- )
176
- )
177
- if len(subclasses) == 0:
178
- raise ValueError(
179
- f"No registered subclass of `{cls.__name__}` can load "
180
- f"a `{tokenizer_preset_cls.__name__}`."
181
- )
182
- if len(subclasses) > 1:
183
- names = ", ".join(f"`{x.__name__}`" for x in subclasses)
184
- raise ValueError(
185
- f"Ambiguous call to `{cls.__name__}.from_preset()`. "
186
- f"Found multiple possible subclasses {names}. "
187
- "Please call `from_preset` on a subclass directly."
188
- )
189
-
190
- tokenizer = load_serialized_object(preset, TOKENIZER_CONFIG_FILE)
191
- tokenizer.load_preset_assets(preset)
192
- preprocessor = cls(tokenizer=tokenizer, **kwargs)
193
-
194
- return preprocessor
172
+
173
+ loader = get_preset_loader(preset)
174
+ backbone_cls = loader.check_backbone_class()
175
+ # Detect the correct subclass if we need to.
176
+ if cls.backbone_cls != backbone_cls:
177
+ cls = find_subclass(preset, cls, backbone_cls)
178
+ return loader.load_preprocessor(cls, **kwargs)
195
179
 
196
180
  def save_to_preset(self, preset_dir):
197
181
  """Save preprocessor to a preset directory.
@@ -204,4 +188,9 @@ class Preprocessor(PreprocessingLayer):
204
188
  preset_dir,
205
189
  config_file=PREPROCESSOR_CONFIG_FILE,
206
190
  )
207
- self.tokenizer.save_to_preset(preset_dir)
191
+ if self.tokenizer:
192
+ self.tokenizer.save_to_preset(preset_dir)
193
+ if self.audio_converter:
194
+ self.audio_converter.save_to_preset(preset_dir)
195
+ if self.image_converter:
196
+ self.image_converter.save_to_preset(preset_dir)
@@ -11,3 +11,9 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+ from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone
16
+ from keras_hub.src.models.resnet.resnet_presets import backbone_presets
17
+ from keras_hub.src.utils.preset_utils import register_presets
18
+
19
+ register_presets(backbone_presets, ResNetBackbone)