keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.15.0.dev20240911134614__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. keras_hub/api/__init__.py +1 -0
  2. keras_hub/api/bounding_box/__init__.py +36 -0
  3. keras_hub/api/layers/__init__.py +14 -0
  4. keras_hub/api/models/__init__.py +75 -31
  5. keras_hub/api/tokenizers/__init__.py +30 -0
  6. keras_hub/src/bounding_box/__init__.py +13 -0
  7. keras_hub/src/bounding_box/converters.py +529 -0
  8. keras_hub/src/bounding_box/formats.py +162 -0
  9. keras_hub/src/bounding_box/iou.py +263 -0
  10. keras_hub/src/bounding_box/to_dense.py +95 -0
  11. keras_hub/src/bounding_box/to_ragged.py +99 -0
  12. keras_hub/src/bounding_box/utils.py +194 -0
  13. keras_hub/src/bounding_box/validate_format.py +99 -0
  14. keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
  15. keras_hub/src/layers/preprocessing/image_converter.py +130 -0
  16. keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
  17. keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
  18. keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
  19. keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
  20. keras_hub/src/layers/preprocessing/random_swap.py +33 -31
  21. keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
  22. keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
  23. keras_hub/src/models/albert/__init__.py +1 -2
  24. keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
  25. keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +29 -10
  26. keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
  27. keras_hub/src/models/albert/albert_tokenizer.py +17 -36
  28. keras_hub/src/models/backbone.py +12 -34
  29. keras_hub/src/models/bart/__init__.py +1 -2
  30. keras_hub/src/models/bart/bart_preprocessor.py +6 -18
  31. keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
  32. keras_hub/src/models/bart/bart_tokenizer.py +12 -39
  33. keras_hub/src/models/bert/__init__.py +1 -5
  34. keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
  35. keras_hub/src/models/bert/bert_presets.py +1 -4
  36. keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +12 -10
  37. keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
  38. keras_hub/src/models/bert/bert_tokenizer.py +17 -35
  39. keras_hub/src/models/bloom/__init__.py +1 -2
  40. keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
  41. keras_hub/src/models/bloom/bloom_preprocessor.py +5 -12
  42. keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
  43. keras_hub/src/models/causal_lm.py +10 -29
  44. keras_hub/src/models/causal_lm_preprocessor.py +195 -0
  45. keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
  46. keras_hub/src/models/deberta_v3/__init__.py +1 -4
  47. keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
  48. keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +11 -11
  49. keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
  50. keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
  51. keras_hub/src/models/densenet/densenet_backbone.py +46 -22
  52. keras_hub/src/models/distil_bert/__init__.py +1 -4
  53. keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
  54. keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +12 -12
  55. keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
  56. keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
  57. keras_hub/src/models/efficientnet/__init__.py +13 -0
  58. keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
  59. keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
  60. keras_hub/src/models/efficientnet/mbconv.py +238 -0
  61. keras_hub/src/models/electra/__init__.py +1 -2
  62. keras_hub/src/models/electra/electra_preprocessor.py +6 -5
  63. keras_hub/src/models/electra/electra_tokenizer.py +17 -32
  64. keras_hub/src/models/f_net/__init__.py +1 -2
  65. keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
  66. keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +10 -8
  67. keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
  68. keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
  69. keras_hub/src/models/falcon/__init__.py +1 -2
  70. keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
  71. keras_hub/src/models/falcon/falcon_preprocessor.py +5 -12
  72. keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
  73. keras_hub/src/models/gemma/__init__.py +1 -2
  74. keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
  75. keras_hub/src/models/gemma/gemma_preprocessor.py +5 -12
  76. keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
  77. keras_hub/src/models/gpt2/__init__.py +1 -2
  78. keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
  79. keras_hub/src/models/gpt2/gpt2_preprocessor.py +5 -12
  80. keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
  81. keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
  82. keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +5 -12
  83. keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
  84. keras_hub/src/models/image_classifier.py +0 -5
  85. keras_hub/src/models/image_classifier_preprocessor.py +83 -0
  86. keras_hub/src/models/llama/__init__.py +1 -2
  87. keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
  88. keras_hub/src/models/llama/llama_preprocessor.py +5 -12
  89. keras_hub/src/models/llama/llama_tokenizer.py +12 -25
  90. keras_hub/src/models/llama3/__init__.py +1 -2
  91. keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
  92. keras_hub/src/models/llama3/llama3_preprocessor.py +2 -0
  93. keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
  94. keras_hub/src/models/masked_lm.py +0 -2
  95. keras_hub/src/models/masked_lm_preprocessor.py +156 -0
  96. keras_hub/src/models/mistral/__init__.py +1 -2
  97. keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
  98. keras_hub/src/models/mistral/mistral_preprocessor.py +5 -12
  99. keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
  100. keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
  101. keras_hub/src/models/mobilenet/__init__.py +13 -0
  102. keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
  103. keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
  104. keras_hub/src/models/opt/__init__.py +1 -2
  105. keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
  106. keras_hub/src/models/opt/opt_preprocessor.py +5 -12
  107. keras_hub/src/models/opt/opt_tokenizer.py +12 -41
  108. keras_hub/src/models/pali_gemma/__init__.py +1 -4
  109. keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
  110. keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
  111. keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
  112. keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +10 -2
  113. keras_hub/src/models/phi3/__init__.py +1 -2
  114. keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
  115. keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
  116. keras_hub/src/models/phi3/phi3_preprocessor.py +5 -12
  117. keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
  118. keras_hub/src/models/preprocessor.py +76 -83
  119. keras_hub/src/models/resnet/__init__.py +6 -0
  120. keras_hub/src/models/resnet/resnet_backbone.py +387 -26
  121. keras_hub/src/models/resnet/resnet_image_classifier.py +7 -3
  122. keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
  123. keras_hub/src/models/resnet/resnet_image_converter.py +23 -0
  124. keras_hub/src/models/resnet/resnet_presets.py +95 -0
  125. keras_hub/src/models/roberta/__init__.py +1 -2
  126. keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
  127. keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +11 -11
  128. keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
  129. keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
  130. keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
  131. keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
  132. keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
  133. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
  134. keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
  135. keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
  136. keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
  137. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
  138. keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
  139. keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
  140. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
  141. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
  142. keras_hub/src/models/t5/__init__.py +1 -2
  143. keras_hub/src/models/t5/t5_tokenizer.py +13 -23
  144. keras_hub/src/models/task.py +71 -116
  145. keras_hub/src/models/{classifier.py → text_classifier.py} +8 -13
  146. keras_hub/src/models/text_classifier_preprocessor.py +138 -0
  147. keras_hub/src/models/whisper/__init__.py +1 -2
  148. keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
  149. keras_hub/src/models/whisper/whisper_backbone.py +0 -3
  150. keras_hub/src/models/whisper/whisper_presets.py +10 -10
  151. keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
  152. keras_hub/src/models/xlm_roberta/__init__.py +1 -4
  153. keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
  154. keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +11 -11
  155. keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
  156. keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
  157. keras_hub/src/tests/test_case.py +25 -0
  158. keras_hub/src/tokenizers/byte_pair_tokenizer.py +29 -17
  159. keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
  160. keras_hub/src/tokenizers/sentence_piece_tokenizer.py +19 -7
  161. keras_hub/src/tokenizers/tokenizer.py +67 -32
  162. keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
  163. keras_hub/src/tokenizers/word_piece_tokenizer.py +33 -47
  164. keras_hub/src/utils/keras_utils.py +0 -50
  165. keras_hub/src/utils/preset_utils.py +238 -67
  166. keras_hub/src/utils/tensor_utils.py +187 -69
  167. keras_hub/src/utils/timm/convert_resnet.py +20 -16
  168. keras_hub/src/utils/timm/preset_loader.py +67 -0
  169. keras_hub/src/utils/transformers/convert_albert.py +193 -0
  170. keras_hub/src/utils/transformers/convert_bart.py +373 -0
  171. keras_hub/src/utils/transformers/convert_bert.py +7 -17
  172. keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
  173. keras_hub/src/utils/transformers/convert_gemma.py +5 -19
  174. keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
  175. keras_hub/src/utils/transformers/convert_llama3.py +7 -18
  176. keras_hub/src/utils/transformers/convert_mistral.py +129 -0
  177. keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
  178. keras_hub/src/utils/transformers/preset_loader.py +77 -0
  179. keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
  180. keras_hub/src/version_utils.py +1 -1
  181. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/METADATA +1 -2
  182. keras_hub_nightly-0.15.0.dev20240911134614.dist-info/RECORD +338 -0
  183. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/WHEEL +1 -1
  184. keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
  185. keras_hub/src/utils/timm/convert.py +0 -37
  186. keras_hub/src/utils/transformers/convert.py +0 -101
  187. keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
  188. {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,195 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import keras
15
+
16
+ from keras_hub.src.api_export import keras_hub_export
17
+ from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
18
+ from keras_hub.src.models.preprocessor import Preprocessor
19
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
20
+ from keras_hub.src.utils.tensor_utils import strip_to_ragged
21
+
22
+
23
+ @keras_hub_export("keras_hub.models.CausalLMPreprocessor")
24
+ class CausalLMPreprocessor(Preprocessor):
25
+ """Base class for causal language modeling preprocessing layers.
26
+
27
+ `CausalLMPreprocessor` tasks wrap a `keras_hub.tokenizer.Tokenizer` to
28
+ create a preprocessing layer for causal language modeling tasks. It is
29
+ intended to be paired with a `keras.models.CausalLM` task.
30
+
31
+ All `CausalLMPreprocessor` take inputs a single input. This can be a single
32
+ string or a batch of strings. See examples below. These inputs
33
+ will be tokenized and padded/truncated to a fixed sequence length.
34
+
35
+ This layer will always output a `(x, y, sample_weight)` tuple, where `x`
36
+ is a dictionary with the tokenized inputs, `y` contains the tokens from `x`
37
+ offset by 1, and `sample_weight` marks where `y` contains padded
38
+ values. The exact contents of `x` will vary depending on the model being
39
+ used.
40
+
41
+ a `CausalLMPreprocessor` contains two extra methods, `generate_preprocess`
42
+ and `generate_postprocess` for use with generation. See examples below.
43
+
44
+ All `CausalLMPreprocessor` tasks include a `from_preset()` constructor
45
+ which can be used to load a pre-trained config and vocabularies. You can
46
+ call the `from_preset()` constructor directly on this base class, in which
47
+ case the correct class for you model will be automatically instantiated.
48
+
49
+ Examples.
50
+ ```python
51
+ preprocessor = keras_hub.models.CausalLMPreprocessor.from_preset(
52
+ "bert_base_en_uncased",
53
+ sequence_length=256, # Optional.
54
+ )
55
+
56
+ # Tokenize, mask and pack a single sentence.
57
+ x = "The quick brown fox jumped."
58
+ x, y, sample_weight = preprocessor(x)
59
+
60
+ # Tokenize and pad/truncate a batch of labeled sentences.
61
+ x = ["The quick brown fox jumped.", "Call me Ishmael."]
62
+ x, y, sample_weight = preprocessor(x)
63
+
64
+ # With a `tf.data.Dataset`.
65
+ ds = tf.data.Dataset.from_tensor_slices(x)
66
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
67
+
68
+ # Generate preprocess and postprocess.
69
+ x = preprocessor.generate_preprocess(x) # Tokenized numeric inputs.
70
+ x = preprocessor.generate_postprocess(x) # Detokenized string outputs.
71
+ ```
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ tokenizer,
77
+ sequence_length=1024,
78
+ add_start_token=True,
79
+ add_end_token=True,
80
+ **kwargs,
81
+ ):
82
+ super().__init__(**kwargs)
83
+ self.tokenizer = tokenizer
84
+ self.packer = None
85
+ self.sequence_length = sequence_length
86
+ self.add_start_token = add_start_token
87
+ self.add_end_token = add_end_token
88
+
89
+ def build(self, input_shape):
90
+ # Defer packer creation to `build()` so that we can be sure tokenizer
91
+ # assets have loaded when restoring a saved model.
92
+ self.packer = StartEndPacker(
93
+ start_value=self.tokenizer.start_token_id,
94
+ end_value=self.tokenizer.end_token_id,
95
+ pad_value=self.tokenizer.pad_token_id,
96
+ sequence_length=self.sequence_length,
97
+ return_padding_mask=True,
98
+ )
99
+ self.built = True
100
+
101
+ @preprocessing_function
102
+ def call(
103
+ self,
104
+ x,
105
+ y=None,
106
+ sample_weight=None,
107
+ sequence_length=None,
108
+ ):
109
+ sequence_length = sequence_length or self.sequence_length
110
+ x = self.tokenizer(x)
111
+ # Pad with one extra token to account for the truncation below.
112
+ token_ids, padding_mask = self.packer(
113
+ x,
114
+ sequence_length=sequence_length + 1,
115
+ add_start_value=self.add_start_token,
116
+ add_end_value=self.add_end_token,
117
+ )
118
+ # The last token does not have a next token, so we truncate it out.
119
+ x = {
120
+ "token_ids": token_ids[..., :-1],
121
+ "padding_mask": padding_mask[..., :-1],
122
+ }
123
+ # Target `y` will be the next token.
124
+ y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
125
+ return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
126
+
127
+ @preprocessing_function
128
+ def generate_preprocess(
129
+ self,
130
+ x,
131
+ sequence_length=None,
132
+ ):
133
+ """Convert strings to integer token input for generation.
134
+
135
+ Similar to calling the layer for training, this method takes in strings
136
+ or tensor strings, tokenizes and packs the input, and computes a padding
137
+ mask masking all inputs not filled in with a padded value.
138
+
139
+ Unlike calling the layer for training, this method does not compute
140
+ labels and will never append a `tokenizer.end_token_id` to the end of
141
+ the sequence (as generation is expected to continue at the end of the
142
+ inputted prompt).
143
+ """
144
+ if not self.built:
145
+ self.build(None)
146
+
147
+ x = self.tokenizer(x)
148
+ token_ids, padding_mask = self.packer(
149
+ x, sequence_length=sequence_length, add_end_value=False
150
+ )
151
+ return {
152
+ "token_ids": token_ids,
153
+ "padding_mask": padding_mask,
154
+ }
155
+
156
+ @preprocessing_function
157
+ def generate_postprocess(
158
+ self,
159
+ x,
160
+ ):
161
+ """Convert integer token output to strings for generation.
162
+
163
+ This method reverses `generate_preprocess()`, by first removing all
164
+ padding and start/end tokens, and then converting the integer sequence
165
+ back to a string.
166
+ """
167
+ if not self.built:
168
+ self.build(None)
169
+
170
+ token_ids, padding_mask = x["token_ids"], x["padding_mask"]
171
+ ids_to_strip = self.tokenizer.special_token_ids
172
+ token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
173
+ return self.tokenizer.detokenize(token_ids)
174
+
175
+ def get_config(self):
176
+ config = super().get_config()
177
+ config.update(
178
+ {
179
+ "sequence_length": self.sequence_length,
180
+ "add_start_token": self.add_start_token,
181
+ "add_end_token": self.add_end_token,
182
+ }
183
+ )
184
+ return config
185
+
186
+ @property
187
+ def sequence_length(self):
188
+ """The padded length of model input sequences."""
189
+ return self._sequence_length
190
+
191
+ @sequence_length.setter
192
+ def sequence_length(self, value):
193
+ self._sequence_length = value
194
+ if self.packer is not None:
195
+ self.packer.sequence_length = value
@@ -15,11 +15,11 @@ import keras
15
15
  from keras import layers
16
16
 
17
17
  from keras_hub.src.api_export import keras_hub_export
18
- from keras_hub.src.models.backbone import Backbone
18
+ from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
19
19
 
20
20
 
21
21
  @keras_hub_export("keras_hub.models.CSPDarkNetBackbone")
22
- class CSPDarkNetBackbone(Backbone):
22
+ class CSPDarkNetBackbone(FeaturePyramidBackbone):
23
23
  """This class represents Keras Backbone of CSPDarkNet model.
24
24
 
25
25
  This class implements a CSPDarkNet backbone as described in
@@ -65,12 +65,15 @@ class CSPDarkNetBackbone(Backbone):
65
65
  self,
66
66
  stackwise_num_filters,
67
67
  stackwise_depth,
68
- include_rescaling,
68
+ include_rescaling=True,
69
69
  block_type="basic_block",
70
- image_shape=(224, 224, 3),
70
+ image_shape=(None, None, 3),
71
71
  **kwargs,
72
72
  ):
73
73
  # === Functional Model ===
74
+ channel_axis = (
75
+ -1 if keras.config.image_data_format() == "channels_last" else 1
76
+ )
74
77
  apply_ConvBlock = (
75
78
  apply_darknet_conv_block_depthwise
76
79
  if block_type == "depthwise_block"
@@ -83,15 +86,22 @@ class CSPDarkNetBackbone(Backbone):
83
86
  if include_rescaling:
84
87
  x = layers.Rescaling(scale=1 / 255.0)(x)
85
88
 
86
- x = apply_focus(name="stem_focus")(x)
89
+ x = apply_focus(channel_axis, name="stem_focus")(x)
87
90
  x = apply_darknet_conv_block(
88
- base_channels, kernel_size=3, strides=1, name="stem_conv"
91
+ base_channels,
92
+ channel_axis,
93
+ kernel_size=3,
94
+ strides=1,
95
+ name="stem_conv",
89
96
  )(x)
97
+
98
+ pyramid_outputs = {}
90
99
  for index, (channels, depth) in enumerate(
91
100
  zip(stackwise_num_filters, stackwise_depth)
92
101
  ):
93
102
  x = apply_ConvBlock(
94
103
  channels,
104
+ channel_axis,
95
105
  kernel_size=3,
96
106
  strides=2,
97
107
  name=f"dark{index + 2}_conv",
@@ -100,17 +110,20 @@ class CSPDarkNetBackbone(Backbone):
100
110
  if index == len(stackwise_depth) - 1:
101
111
  x = apply_spatial_pyramid_pooling_bottleneck(
102
112
  channels,
113
+ channel_axis,
103
114
  hidden_filters=channels // 2,
104
115
  name=f"dark{index + 2}_spp",
105
116
  )(x)
106
117
 
107
118
  x = apply_cross_stage_partial(
108
119
  channels,
120
+ channel_axis,
109
121
  num_bottlenecks=depth,
110
122
  block_type="basic_block",
111
123
  residual=(index != len(stackwise_depth) - 1),
112
124
  name=f"dark{index + 2}_csp",
113
125
  )(x)
126
+ pyramid_outputs[f"P{index + 2}"] = x
114
127
 
115
128
  super().__init__(inputs=image_input, outputs=x, **kwargs)
116
129
 
@@ -120,6 +133,7 @@ class CSPDarkNetBackbone(Backbone):
120
133
  self.include_rescaling = include_rescaling
121
134
  self.block_type = block_type
122
135
  self.image_shape = image_shape
136
+ self.pyramid_outputs = pyramid_outputs
123
137
 
124
138
  def get_config(self):
125
139
  config = super().get_config()
@@ -135,7 +149,7 @@ class CSPDarkNetBackbone(Backbone):
135
149
  return config
136
150
 
137
151
 
138
- def apply_focus(name=None):
152
+ def apply_focus(channel_axis, name=None):
139
153
  """A block used in CSPDarknet to focus information into channels of the
140
154
  image.
141
155
 
@@ -151,7 +165,7 @@ def apply_focus(name=None):
151
165
  """
152
166
 
153
167
  def apply(x):
154
- return layers.Concatenate(name=name)(
168
+ return layers.Concatenate(axis=channel_axis, name=name)(
155
169
  [
156
170
  x[..., ::2, ::2, :],
157
171
  x[..., 1::2, ::2, :],
@@ -164,7 +178,13 @@ def apply_focus(name=None):
164
178
 
165
179
 
166
180
  def apply_darknet_conv_block(
167
- filters, kernel_size, strides, use_bias=False, activation="silu", name=None
181
+ filters,
182
+ channel_axis,
183
+ kernel_size,
184
+ strides,
185
+ use_bias=False,
186
+ activation="silu",
187
+ name=None,
168
188
  ):
169
189
  """
170
190
  The basic conv block used in Darknet. Applies Conv2D followed by a
@@ -193,11 +213,12 @@ def apply_darknet_conv_block(
193
213
  kernel_size,
194
214
  strides,
195
215
  padding="same",
216
+ data_format=keras.config.image_data_format(),
196
217
  use_bias=use_bias,
197
218
  name=name + "_conv",
198
219
  )(inputs)
199
220
 
200
- x = layers.BatchNormalization(name=name + "_bn")(x)
221
+ x = layers.BatchNormalization(axis=channel_axis, name=name + "_bn")(x)
201
222
 
202
223
  if activation == "silu":
203
224
  x = layers.Lambda(lambda x: keras.activations.silu(x))(x)
@@ -212,7 +233,7 @@ def apply_darknet_conv_block(
212
233
 
213
234
 
214
235
  def apply_darknet_conv_block_depthwise(
215
- filters, kernel_size, strides, activation="silu", name=None
236
+ filters, channel_axis, kernel_size, strides, activation="silu", name=None
216
237
  ):
217
238
  """
218
239
  The depthwise conv block used in CSPDarknet.
@@ -236,9 +257,13 @@ def apply_darknet_conv_block_depthwise(
236
257
 
237
258
  def apply(inputs):
238
259
  x = layers.DepthwiseConv2D(
239
- kernel_size, strides, padding="same", use_bias=False
260
+ kernel_size,
261
+ strides,
262
+ padding="same",
263
+ data_format=keras.config.image_data_format(),
264
+ use_bias=False,
240
265
  )(inputs)
241
- x = layers.BatchNormalization()(x)
266
+ x = layers.BatchNormalization(axis=channel_axis)(x)
242
267
 
243
268
  if activation == "silu":
244
269
  x = layers.Lambda(lambda x: keras.activations.swish(x))(x)
@@ -248,7 +273,11 @@ def apply_darknet_conv_block_depthwise(
248
273
  x = layers.LeakyReLU(0.1)(x)
249
274
 
250
275
  x = apply_darknet_conv_block(
251
- filters, kernel_size=1, strides=1, activation=activation
276
+ filters,
277
+ channel_axis,
278
+ kernel_size=1,
279
+ strides=1,
280
+ activation=activation,
252
281
  )(x)
253
282
 
254
283
  return x
@@ -258,6 +287,7 @@ def apply_darknet_conv_block_depthwise(
258
287
 
259
288
  def apply_spatial_pyramid_pooling_bottleneck(
260
289
  filters,
290
+ channel_axis,
261
291
  hidden_filters=None,
262
292
  kernel_sizes=(5, 9, 13),
263
293
  activation="silu",
@@ -291,6 +321,7 @@ def apply_spatial_pyramid_pooling_bottleneck(
291
321
  def apply(x):
292
322
  x = apply_darknet_conv_block(
293
323
  hidden_filters,
324
+ channel_axis,
294
325
  kernel_size=1,
295
326
  strides=1,
296
327
  activation=activation,
@@ -304,13 +335,15 @@ def apply_spatial_pyramid_pooling_bottleneck(
304
335
  kernel_size,
305
336
  strides=1,
306
337
  padding="same",
338
+ data_format=keras.config.image_data_format(),
307
339
  name=f"{name}_maxpool_{kernel_size}",
308
340
  )(x[0])
309
341
  )
310
342
 
311
- x = layers.Concatenate(name=f"{name}_concat")(x)
343
+ x = layers.Concatenate(axis=channel_axis, name=f"{name}_concat")(x)
312
344
  x = apply_darknet_conv_block(
313
345
  filters,
346
+ channel_axis,
314
347
  kernel_size=1,
315
348
  strides=1,
316
349
  activation=activation,
@@ -324,6 +357,7 @@ def apply_spatial_pyramid_pooling_bottleneck(
324
357
 
325
358
  def apply_cross_stage_partial(
326
359
  filters,
360
+ channel_axis,
327
361
  num_bottlenecks,
328
362
  residual=True,
329
363
  block_type="basic_block",
@@ -361,6 +395,7 @@ def apply_cross_stage_partial(
361
395
 
362
396
  x1 = apply_darknet_conv_block(
363
397
  hidden_channels,
398
+ channel_axis,
364
399
  kernel_size=1,
365
400
  strides=1,
366
401
  activation=activation,
@@ -369,6 +404,7 @@ def apply_cross_stage_partial(
369
404
 
370
405
  x2 = apply_darknet_conv_block(
371
406
  hidden_channels,
407
+ channel_axis,
372
408
  kernel_size=1,
373
409
  strides=1,
374
410
  activation=activation,
@@ -379,6 +415,7 @@ def apply_cross_stage_partial(
379
415
  residual_x = x1
380
416
  x1 = apply_darknet_conv_block(
381
417
  hidden_channels,
418
+ channel_axis,
382
419
  kernel_size=1,
383
420
  strides=1,
384
421
  activation=activation,
@@ -386,6 +423,7 @@ def apply_cross_stage_partial(
386
423
  )(x1)
387
424
  x1 = ConvBlock(
388
425
  hidden_channels,
426
+ channel_axis,
389
427
  kernel_size=3,
390
428
  strides=1,
391
429
  activation=activation,
@@ -399,6 +437,7 @@ def apply_cross_stage_partial(
399
437
  x = layers.Concatenate(name=f"{name}_concat")([x1, x2])
400
438
  x = apply_darknet_conv_block(
401
439
  filters,
440
+ channel_axis,
402
441
  kernel_size=1,
403
442
  strides=1,
404
443
  activation=activation,
@@ -16,9 +16,6 @@ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
16
16
  DebertaV3Backbone,
17
17
  )
18
18
  from keras_hub.src.models.deberta_v3.deberta_v3_presets import backbone_presets
19
- from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import (
20
- DebertaV3Tokenizer,
21
- )
22
19
  from keras_hub.src.utils.preset_utils import register_presets
23
20
 
24
- register_presets(backbone_presets, (DebertaV3Backbone, DebertaV3Tokenizer))
21
+ register_presets(backbone_presets, DebertaV3Backbone)
@@ -13,19 +13,20 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import keras
16
- from absl import logging
17
16
 
18
17
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.layers.preprocessing.masked_lm_mask_generator import (
20
- MaskedLMMaskGenerator,
18
+ from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
19
+ DebertaV3Backbone,
21
20
  )
22
- from keras_hub.src.models.deberta_v3.deberta_v3_preprocessor import (
23
- DebertaV3Preprocessor,
21
+ from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import (
22
+ DebertaV3Tokenizer,
24
23
  )
24
+ from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
25
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
25
26
 
26
27
 
27
28
  @keras_hub_export("keras_hub.models.DebertaV3MaskedLMPreprocessor")
28
- class DebertaV3MaskedLMPreprocessor(DebertaV3Preprocessor):
29
+ class DebertaV3MaskedLMPreprocessor(MaskedLMPreprocessor):
29
30
  """DeBERTa preprocessing for the masked language modeling task.
30
31
 
31
32
  This preprocessing layer will prepare inputs for a masked language modeling
@@ -115,77 +116,13 @@ class DebertaV3MaskedLMPreprocessor(DebertaV3Preprocessor):
115
116
  ```
116
117
  """
117
118
 
118
- def __init__(
119
- self,
120
- tokenizer,
121
- sequence_length=512,
122
- truncate="round_robin",
123
- mask_selection_rate=0.15,
124
- mask_selection_length=96,
125
- mask_token_rate=0.8,
126
- random_token_rate=0.1,
127
- **kwargs,
128
- ):
129
- super().__init__(
130
- tokenizer,
131
- sequence_length=sequence_length,
132
- truncate=truncate,
133
- **kwargs,
134
- )
135
-
136
- self.mask_selection_rate = mask_selection_rate
137
- self.mask_selection_length = mask_selection_length
138
- self.mask_token_rate = mask_token_rate
139
- self.random_token_rate = random_token_rate
140
- self.masker = None
141
-
142
- def build(self, input_shape):
143
- super().build(input_shape)
144
- # Defer masker creation to `build()` so that we can be sure tokenizer
145
- # assets have loaded when restoring a saved model.
146
- self.masker = MaskedLMMaskGenerator(
147
- mask_selection_rate=self.mask_selection_rate,
148
- mask_selection_length=self.mask_selection_length,
149
- mask_token_rate=self.mask_token_rate,
150
- random_token_rate=self.random_token_rate,
151
- vocabulary_size=self.tokenizer.vocabulary_size(),
152
- mask_token_id=self.tokenizer.mask_token_id,
153
- unselectable_token_ids=[
154
- self.tokenizer.cls_token_id,
155
- self.tokenizer.sep_token_id,
156
- self.tokenizer.pad_token_id,
157
- ],
158
- )
159
-
160
- def get_config(self):
161
- config = super().get_config()
162
- config.update(
163
- {
164
- "mask_selection_rate": self.mask_selection_rate,
165
- "mask_selection_length": self.mask_selection_length,
166
- "mask_token_rate": self.mask_token_rate,
167
- "random_token_rate": self.random_token_rate,
168
- }
169
- )
170
- return config
119
+ backbone_cls = DebertaV3Backbone
120
+ tokenizer_cls = DebertaV3Tokenizer
171
121
 
122
+ @preprocessing_function
172
123
  def call(self, x, y=None, sample_weight=None):
173
- if y is not None or sample_weight is not None:
174
- logging.warning(
175
- f"{self.__class__.__name__} generates `y` and `sample_weight` "
176
- "based on your input data, but your data already contains `y` "
177
- "or `sample_weight`. Your `y` and `sample_weight` will be "
178
- "ignored."
179
- )
180
-
181
- x = super().call(x)
182
- token_ids, padding_mask = x["token_ids"], x["padding_mask"]
183
- masker_outputs = self.masker(token_ids)
184
- x = {
185
- "token_ids": masker_outputs["token_ids"],
186
- "padding_mask": padding_mask,
187
- "mask_positions": masker_outputs["mask_positions"],
188
- }
189
- y = masker_outputs["mask_ids"]
190
- sample_weight = masker_outputs["mask_weights"]
124
+ output = super().call(x, y=y, sample_weight=sample_weight)
125
+ x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(output)
126
+ # Backbone has no segment ID input.
127
+ del x["segment_ids"]
191
128
  return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
@@ -16,20 +16,20 @@
16
16
  import keras
17
17
 
18
18
  from keras_hub.src.api_export import keras_hub_export
19
- from keras_hub.src.models.classifier import Classifier
20
19
  from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
21
20
  DebertaV3Backbone,
22
21
  )
23
22
  from keras_hub.src.models.deberta_v3.deberta_v3_backbone import (
24
23
  deberta_kernel_initializer,
25
24
  )
26
- from keras_hub.src.models.deberta_v3.deberta_v3_preprocessor import (
27
- DebertaV3Preprocessor,
25
+ from keras_hub.src.models.deberta_v3.deberta_v3_text_classifier_preprocessor import (
26
+ DebertaV3TextClassifierPreprocessor,
28
27
  )
28
+ from keras_hub.src.models.text_classifier import TextClassifier
29
29
 
30
30
 
31
- @keras_hub_export("keras_hub.models.DebertaV3Classifier")
32
- class DebertaV3Classifier(Classifier):
31
+ @keras_hub_export("keras_hub.models.DebertaV3TextClassifier")
32
+ class DebertaV3TextClassifier(TextClassifier):
33
33
  """An end-to-end DeBERTa model for classification tasks.
34
34
 
35
35
  This model attaches a classification head to a
@@ -53,7 +53,7 @@ class DebertaV3Classifier(Classifier):
53
53
  Args:
54
54
  backbone: A `keras_hub.models.DebertaV3` instance.
55
55
  num_classes: int. Number of classes to predict.
56
- preprocessor: A `keras_hub.models.DebertaV3Preprocessor` or `None`. If
56
+ preprocessor: A `keras_hub.models.DebertaV3TextClassifierPreprocessor` or `None`. If
57
57
  `None`, this model will not apply preprocessing, and inputs should
58
58
  be preprocessed before calling the model.
59
59
  activation: Optional `str` or callable. The
@@ -72,7 +72,7 @@ class DebertaV3Classifier(Classifier):
72
72
  labels = [0, 3]
73
73
 
74
74
  # Pretrained classifier.
75
- classifier = keras_hub.models.DebertaV3Classifier.from_preset(
75
+ classifier = keras_hub.models.DebertaV3TextClassifier.from_preset(
76
76
  "deberta_v3_base_en",
77
77
  num_classes=4,
78
78
  )
@@ -100,7 +100,7 @@ class DebertaV3Classifier(Classifier):
100
100
  labels = [0, 3]
101
101
 
102
102
  # Pretrained classifier without preprocessing.
103
- classifier = keras_hub.models.DebertaV3Classifier.from_preset(
103
+ classifier = keras_hub.models.DebertaV3TextClassifier.from_preset(
104
104
  "deberta_v3_base_en",
105
105
  num_classes=4,
106
106
  preprocessor=None,
@@ -132,7 +132,7 @@ class DebertaV3Classifier(Classifier):
132
132
  tokenizer = keras_hub.models.DebertaV3Tokenizer(
133
133
  proto=bytes_io.getvalue(),
134
134
  )
135
- preprocessor = keras_hub.models.DebertaV3Preprocessor(
135
+ preprocessor = keras_hub.models.DebertaV3TextClassifierPreprocessor(
136
136
  tokenizer=tokenizer,
137
137
  sequence_length=128,
138
138
  )
@@ -144,7 +144,7 @@ class DebertaV3Classifier(Classifier):
144
144
  intermediate_dim=512,
145
145
  max_sequence_length=128,
146
146
  )
147
- classifier = keras_hub.models.DebertaV3Classifier(
147
+ classifier = keras_hub.models.DebertaV3TextClassifier(
148
148
  backbone=backbone,
149
149
  preprocessor=preprocessor,
150
150
  num_classes=4,
@@ -154,7 +154,7 @@ class DebertaV3Classifier(Classifier):
154
154
  """
155
155
 
156
156
  backbone_cls = DebertaV3Backbone
157
- preprocessor_cls = DebertaV3Preprocessor
157
+ preprocessor_cls = DebertaV3TextClassifierPreprocessor
158
158
 
159
159
  def __init__(
160
160
  self,