keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.16.0.dev2024092017__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/__init__.py +0 -6
- keras_hub/api/__init__.py +2 -0
- keras_hub/api/bounding_box/__init__.py +36 -0
- keras_hub/api/layers/__init__.py +14 -0
- keras_hub/api/models/__init__.py +97 -48
- keras_hub/api/tokenizers/__init__.py +30 -0
- keras_hub/api/utils/__init__.py +22 -0
- keras_hub/src/api_export.py +15 -9
- keras_hub/src/bounding_box/__init__.py +13 -0
- keras_hub/src/bounding_box/converters.py +529 -0
- keras_hub/src/bounding_box/formats.py +162 -0
- keras_hub/src/bounding_box/iou.py +263 -0
- keras_hub/src/bounding_box/to_dense.py +95 -0
- keras_hub/src/bounding_box/to_ragged.py +99 -0
- keras_hub/src/bounding_box/utils.py +194 -0
- keras_hub/src/bounding_box/validate_format.py +99 -0
- keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
- keras_hub/src/layers/preprocessing/image_converter.py +130 -0
- keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
- keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
- keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
- keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
- keras_hub/src/layers/preprocessing/random_swap.py +33 -31
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
- keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
- keras_hub/src/models/albert/__init__.py +1 -2
- keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
- keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +34 -10
- keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/albert/albert_tokenizer.py +17 -36
- keras_hub/src/models/backbone.py +12 -34
- keras_hub/src/models/bart/__init__.py +1 -2
- keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
- keras_hub/src/models/bart/bart_tokenizer.py +12 -39
- keras_hub/src/models/bert/__init__.py +1 -5
- keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
- keras_hub/src/models/bert/bert_presets.py +1 -4
- keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +19 -12
- keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/bert/bert_tokenizer.py +17 -35
- keras_hub/src/models/bloom/__init__.py +1 -2
- keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
- keras_hub/src/models/causal_lm.py +10 -29
- keras_hub/src/models/causal_lm_preprocessor.py +195 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
- keras_hub/src/models/deberta_v3/__init__.py +1 -4
- keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
- keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +16 -11
- keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
- keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
- keras_hub/src/models/densenet/densenet_backbone.py +46 -22
- keras_hub/src/models/distil_bert/__init__.py +1 -4
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
- keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +17 -12
- keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
- keras_hub/src/models/efficientnet/__init__.py +13 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
- keras_hub/src/models/efficientnet/mbconv.py +238 -0
- keras_hub/src/models/electra/__init__.py +1 -2
- keras_hub/src/models/electra/electra_tokenizer.py +17 -32
- keras_hub/src/models/f_net/__init__.py +1 -2
- keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
- keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +17 -10
- keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
- keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
- keras_hub/src/models/falcon/__init__.py +1 -2
- keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
- keras_hub/src/models/gemma/__init__.py +1 -2
- keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
- keras_hub/src/models/gemma/gemma_decoder_block.py +1 -1
- keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
- keras_hub/src/models/gpt2/__init__.py +1 -2
- keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/gpt2/gpt2_preprocessor.py +12 -90
- keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
- keras_hub/src/models/image_classifier.py +0 -5
- keras_hub/src/models/image_classifier_preprocessor.py +83 -0
- keras_hub/src/models/llama/__init__.py +1 -2
- keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
- keras_hub/src/models/llama/llama_tokenizer.py +12 -25
- keras_hub/src/models/llama3/__init__.py +1 -2
- keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
- keras_hub/src/models/masked_lm.py +0 -2
- keras_hub/src/models/masked_lm_preprocessor.py +156 -0
- keras_hub/src/models/mistral/__init__.py +1 -2
- keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
- keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
- keras_hub/src/models/mobilenet/__init__.py +13 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
- keras_hub/src/models/opt/__init__.py +1 -2
- keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
- keras_hub/src/models/opt/opt_tokenizer.py +12 -41
- keras_hub/src/models/pali_gemma/__init__.py +1 -4
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
- keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +11 -3
- keras_hub/src/models/phi3/__init__.py +1 -2
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
- keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
- keras_hub/src/models/preprocessor.py +72 -83
- keras_hub/src/models/resnet/__init__.py +6 -0
- keras_hub/src/models/resnet/resnet_backbone.py +390 -42
- keras_hub/src/models/resnet/resnet_image_classifier.py +33 -6
- keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
- keras_hub/src/models/{llama3/llama3_preprocessor.py → resnet/resnet_image_converter.py} +7 -5
- keras_hub/src/models/resnet/resnet_presets.py +95 -0
- keras_hub/src/models/retinanet/__init__.py +13 -0
- keras_hub/src/models/retinanet/anchor_generator.py +175 -0
- keras_hub/src/models/retinanet/box_matcher.py +259 -0
- keras_hub/src/models/retinanet/non_max_supression.py +578 -0
- keras_hub/src/models/roberta/__init__.py +1 -2
- keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
- keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +16 -11
- keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
- keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
- keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
- keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
- keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
- keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
- keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
- keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
- keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
- keras_hub/src/models/t5/__init__.py +1 -2
- keras_hub/src/models/t5/t5_tokenizer.py +13 -23
- keras_hub/src/models/task.py +71 -116
- keras_hub/src/models/{classifier.py → text_classifier.py} +19 -13
- keras_hub/src/models/text_classifier_preprocessor.py +138 -0
- keras_hub/src/models/whisper/__init__.py +1 -2
- keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
- keras_hub/src/models/whisper/whisper_backbone.py +0 -3
- keras_hub/src/models/whisper/whisper_presets.py +10 -10
- keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
- keras_hub/src/models/xlm_roberta/__init__.py +1 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
- keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +16 -11
- keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
- keras_hub/src/tests/test_case.py +46 -0
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +30 -17
- keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +20 -7
- keras_hub/src/tokenizers/tokenizer.py +67 -32
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
- keras_hub/src/tokenizers/word_piece_tokenizer.py +34 -47
- keras_hub/src/utils/imagenet/__init__.py +13 -0
- keras_hub/src/utils/imagenet/imagenet_utils.py +1067 -0
- keras_hub/src/utils/keras_utils.py +0 -50
- keras_hub/src/utils/preset_utils.py +230 -68
- keras_hub/src/utils/tensor_utils.py +187 -69
- keras_hub/src/utils/timm/convert_resnet.py +19 -16
- keras_hub/src/utils/timm/preset_loader.py +66 -0
- keras_hub/src/utils/transformers/convert_albert.py +193 -0
- keras_hub/src/utils/transformers/convert_bart.py +373 -0
- keras_hub/src/utils/transformers/convert_bert.py +7 -17
- keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
- keras_hub/src/utils/transformers/convert_gemma.py +5 -19
- keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
- keras_hub/src/utils/transformers/convert_llama3.py +7 -18
- keras_hub/src/utils/transformers/convert_mistral.py +129 -0
- keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
- keras_hub/src/utils/transformers/preset_loader.py +77 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
- keras_hub/src/version_utils.py +1 -1
- keras_hub_nightly-0.16.0.dev2024092017.dist-info/METADATA +202 -0
- keras_hub_nightly-0.16.0.dev2024092017.dist-info/RECORD +334 -0
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/WHEEL +1 -1
- keras_hub/src/models/bart/bart_preprocessor.py +0 -276
- keras_hub/src/models/bloom/bloom_preprocessor.py +0 -185
- keras_hub/src/models/electra/electra_preprocessor.py +0 -154
- keras_hub/src/models/falcon/falcon_preprocessor.py +0 -187
- keras_hub/src/models/gemma/gemma_preprocessor.py +0 -191
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +0 -145
- keras_hub/src/models/llama/llama_preprocessor.py +0 -189
- keras_hub/src/models/mistral/mistral_preprocessor.py +0 -190
- keras_hub/src/models/opt/opt_preprocessor.py +0 -188
- keras_hub/src/models/phi3/phi3_preprocessor.py +0 -190
- keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
- keras_hub/src/utils/timm/convert.py +0 -37
- keras_hub/src/utils/transformers/convert.py +0 -101
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/METADATA +0 -34
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.16.0.dev2024092017.dist-info}/top_level.txt +0 -0
@@ -12,19 +12,14 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
import keras
|
16
|
-
from absl import logging
|
17
|
-
|
18
15
|
from keras_hub.src.api_export import keras_hub_export
|
19
|
-
from keras_hub.src.models.
|
20
|
-
from keras_hub.src.
|
21
|
-
|
22
|
-
)
|
23
|
-
from keras_hub.src.utils.tensor_utils import strip_to_ragged
|
16
|
+
from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
|
17
|
+
from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone
|
18
|
+
from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer
|
24
19
|
|
25
20
|
|
26
21
|
@keras_hub_export("keras_hub.models.Phi3CausalLMPreprocessor")
|
27
|
-
class Phi3CausalLMPreprocessor(
|
22
|
+
class Phi3CausalLMPreprocessor(CausalLMPreprocessor):
|
28
23
|
"""Phi3 Causal LM preprocessor.
|
29
24
|
|
30
25
|
This preprocessing layer is meant for use with
|
@@ -91,83 +86,5 @@ class Phi3CausalLMPreprocessor(Phi3Preprocessor):
|
|
91
86
|
```
|
92
87
|
"""
|
93
88
|
|
94
|
-
|
95
|
-
|
96
|
-
x,
|
97
|
-
y=None,
|
98
|
-
sample_weight=None,
|
99
|
-
sequence_length=None,
|
100
|
-
):
|
101
|
-
if y is not None or sample_weight is not None:
|
102
|
-
logging.warning(
|
103
|
-
"`Phi3CausalLMPreprocessor` generates `y` and "
|
104
|
-
"`sample_weight` based on your input data, but your data "
|
105
|
-
"already contains `y` or `sample_weight`. Your `y` and "
|
106
|
-
"`sample_weight` will be ignored."
|
107
|
-
)
|
108
|
-
sequence_length = sequence_length or self.sequence_length
|
109
|
-
|
110
|
-
x = convert_inputs_to_list_of_tensor_segments(x)[0]
|
111
|
-
x = self.tokenizer(x)
|
112
|
-
# Pad with one extra token to account for the truncation below.
|
113
|
-
token_ids, padding_mask = self.packer(
|
114
|
-
x,
|
115
|
-
sequence_length=sequence_length + 1,
|
116
|
-
add_start_value=self.add_start_token,
|
117
|
-
add_end_value=self.add_end_token,
|
118
|
-
)
|
119
|
-
# The last token does not have a next token, so we truncate it out.
|
120
|
-
x = {
|
121
|
-
"token_ids": token_ids[..., :-1],
|
122
|
-
"padding_mask": padding_mask[..., :-1],
|
123
|
-
}
|
124
|
-
# Target `y` will be the next token.
|
125
|
-
y, sample_weight = token_ids[..., 1:], padding_mask[..., 1:]
|
126
|
-
return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
|
127
|
-
|
128
|
-
def generate_preprocess(
|
129
|
-
self,
|
130
|
-
x,
|
131
|
-
sequence_length=None,
|
132
|
-
):
|
133
|
-
"""Convert strings to integer token input for generation.
|
134
|
-
|
135
|
-
Similar to calling the layer for training, this method takes in strings
|
136
|
-
or tensor strings, tokenizes and packs the input, and computes a padding
|
137
|
-
mask masking all inputs not filled in with a padded value.
|
138
|
-
|
139
|
-
Unlike calling the layer for training, this method does not compute
|
140
|
-
labels and will never append a `tokenizer.end_token_id` to the end of
|
141
|
-
the sequence (as generation is expected to continue at the end of the
|
142
|
-
inputted prompt).
|
143
|
-
"""
|
144
|
-
if not self.built:
|
145
|
-
self.build(None)
|
146
|
-
|
147
|
-
x = convert_inputs_to_list_of_tensor_segments(x)[0]
|
148
|
-
x = self.tokenizer(x)
|
149
|
-
token_ids, padding_mask = self.packer(
|
150
|
-
x, sequence_length=sequence_length, add_end_value=False
|
151
|
-
)
|
152
|
-
return {
|
153
|
-
"token_ids": token_ids,
|
154
|
-
"padding_mask": padding_mask,
|
155
|
-
}
|
156
|
-
|
157
|
-
def generate_postprocess(
|
158
|
-
self,
|
159
|
-
x,
|
160
|
-
):
|
161
|
-
"""Convert integer token output to strings for generation.
|
162
|
-
|
163
|
-
This method reverses `generate_preprocess()`, by first removing all
|
164
|
-
padding and start/end tokens, and then converting the integer sequence
|
165
|
-
back to a string.
|
166
|
-
"""
|
167
|
-
token_ids, padding_mask = x["token_ids"], x["padding_mask"]
|
168
|
-
ids_to_strip = (
|
169
|
-
self.tokenizer.start_token_id,
|
170
|
-
self.tokenizer.end_token_id,
|
171
|
-
)
|
172
|
-
token_ids = strip_to_ragged(token_ids, padding_mask, ids_to_strip)
|
173
|
-
return self.tokenizer.detokenize(token_ids)
|
89
|
+
backbone_cls = Phi3Backbone
|
90
|
+
tokenizer_cls = Phi3Tokenizer
|
@@ -11,17 +11,19 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
import copy
|
15
|
-
|
16
14
|
from keras_hub.src.api_export import keras_hub_export
|
17
|
-
from keras_hub.src.models.phi3.
|
15
|
+
from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone
|
18
16
|
from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
|
19
17
|
SentencePieceTokenizer,
|
20
18
|
)
|
21
|
-
from keras_hub.src.utils.python_utils import classproperty
|
22
19
|
|
23
20
|
|
24
|
-
@keras_hub_export(
|
21
|
+
@keras_hub_export(
|
22
|
+
[
|
23
|
+
"keras_hub.tokenizers.Phi3Tokenizer",
|
24
|
+
"keras_hub.models.Phi3Tokenizer",
|
25
|
+
]
|
26
|
+
)
|
25
27
|
class Phi3Tokenizer(SentencePieceTokenizer):
|
26
28
|
"""Phi3 tokenizer layer based on SentencePiece.
|
27
29
|
|
@@ -31,10 +33,6 @@ class Phi3Tokenizer(SentencePieceTokenizer):
|
|
31
33
|
Phi3 models and provides a `from_preset()` method to automatically
|
32
34
|
download a matching vocabulary for a Phi3 preset.
|
33
35
|
|
34
|
-
This tokenizer does not provide truncation or padding of inputs. It can be
|
35
|
-
combined with a `keras_hub.models.Phi3Preprocessor` layer for input
|
36
|
-
packing.
|
37
|
-
|
38
36
|
If input is a batch of strings (rank > 0), the layer will output a
|
39
37
|
`tf.RaggedTensor` where the last dimension of the output is ragged.
|
40
38
|
|
@@ -63,32 +61,10 @@ class Phi3Tokenizer(SentencePieceTokenizer):
|
|
63
61
|
```
|
64
62
|
"""
|
65
63
|
|
64
|
+
backbone_cls = Phi3Backbone
|
65
|
+
|
66
66
|
def __init__(self, proto, **kwargs):
|
67
|
-
self.
|
68
|
-
self.
|
67
|
+
self._add_special_token("<s>", "start_token")
|
68
|
+
self._add_special_token("<|endoftext|>", "end_token")
|
69
|
+
self.pad_token_id = 0
|
69
70
|
super().__init__(proto=proto, **kwargs)
|
70
|
-
|
71
|
-
def set_proto(self, proto):
|
72
|
-
super().set_proto(proto)
|
73
|
-
if proto is not None:
|
74
|
-
for token in [self.start_token, self.end_token]:
|
75
|
-
if token not in self.get_vocabulary():
|
76
|
-
raise ValueError(
|
77
|
-
f"Cannot find token `'{token}'` in the provided "
|
78
|
-
f"`vocabulary`. Please provide `'{token}'` in your "
|
79
|
-
"`vocabulary` or use a pretrained `vocabulary` name."
|
80
|
-
)
|
81
|
-
self.start_token_id = self.token_to_id(self.start_token)
|
82
|
-
self.end_token_id = self.token_to_id(self.end_token)
|
83
|
-
# TODO: `pad_token` is `<|endoftext|>`, but setting it to `<unk>`
|
84
|
-
# for now, because of the way sampler works. sampler will think that
|
85
|
-
# `pad_token` is `end_token` and stop generation immediatly.
|
86
|
-
self.pad_token_id = 0
|
87
|
-
else:
|
88
|
-
self.start_token_id = None
|
89
|
-
self.end_token_id = None
|
90
|
-
self.pad_token_id = None
|
91
|
-
|
92
|
-
@classproperty
|
93
|
-
def presets(cls):
|
94
|
-
return copy.deepcopy(backbone_presets)
|
@@ -19,13 +19,9 @@ from keras_hub.src.layers.preprocessing.preprocessing_layer import (
|
|
19
19
|
PreprocessingLayer,
|
20
20
|
)
|
21
21
|
from keras_hub.src.utils.preset_utils import PREPROCESSOR_CONFIG_FILE
|
22
|
-
from keras_hub.src.utils.preset_utils import
|
23
|
-
from keras_hub.src.utils.preset_utils import
|
24
|
-
from keras_hub.src.utils.preset_utils import
|
25
|
-
from keras_hub.src.utils.preset_utils import check_format
|
26
|
-
from keras_hub.src.utils.preset_utils import list_presets
|
27
|
-
from keras_hub.src.utils.preset_utils import list_subclasses
|
28
|
-
from keras_hub.src.utils.preset_utils import load_serialized_object
|
22
|
+
from keras_hub.src.utils.preset_utils import builtin_presets
|
23
|
+
from keras_hub.src.utils.preset_utils import find_subclass
|
24
|
+
from keras_hub.src.utils.preset_utils import get_preset_loader
|
29
25
|
from keras_hub.src.utils.preset_utils import save_serialized_object
|
30
26
|
from keras_hub.src.utils.python_utils import classproperty
|
31
27
|
|
@@ -45,15 +41,20 @@ class Preprocessor(PreprocessingLayer):
|
|
45
41
|
should set the `tokenizer` property on construction.
|
46
42
|
"""
|
47
43
|
|
44
|
+
backbone_cls = None
|
48
45
|
tokenizer_cls = None
|
46
|
+
audio_converter_cls = None
|
47
|
+
image_converter_cls = None
|
49
48
|
|
50
49
|
def __init__(self, *args, **kwargs):
|
51
50
|
super().__init__(*args, **kwargs)
|
52
51
|
self._tokenizer = None
|
52
|
+
self._image_converter = None
|
53
|
+
self._audio_converter = None
|
53
54
|
|
54
55
|
def __setattr__(self, name, value):
|
55
56
|
# Work around torch setattr for properties.
|
56
|
-
if name in ["tokenizer"]:
|
57
|
+
if name in ["tokenizer", "audio_converter", "image_converter"]:
|
57
58
|
return object.__setattr__(self, name, value)
|
58
59
|
return super().__setattr__(name, value)
|
59
60
|
|
@@ -66,26 +67,60 @@ class Preprocessor(PreprocessingLayer):
|
|
66
67
|
def tokenizer(self, value):
|
67
68
|
self._tokenizer = value
|
68
69
|
|
70
|
+
@property
|
71
|
+
def audio_converter(self):
|
72
|
+
"""The audio converter used to preprocess audio data."""
|
73
|
+
return self._audio_converter
|
74
|
+
|
75
|
+
@audio_converter.setter
|
76
|
+
def audio_converter(self, value):
|
77
|
+
self._audio_converter = value
|
78
|
+
|
79
|
+
@property
|
80
|
+
def image_converter(self):
|
81
|
+
"""The image converter used to preprocess image data."""
|
82
|
+
return self._image_converter
|
83
|
+
|
84
|
+
@image_converter.setter
|
85
|
+
def image_converter(self, value):
|
86
|
+
self._image_converter = value
|
87
|
+
|
69
88
|
def get_config(self):
|
70
89
|
config = super().get_config()
|
71
|
-
|
90
|
+
if self.tokenizer:
|
91
|
+
config["tokenizer"] = keras.layers.serialize(self.tokenizer)
|
92
|
+
if self.audio_converter:
|
93
|
+
config["audio_converter"] = keras.layers.serialize(
|
94
|
+
self.audio_converter
|
95
|
+
)
|
96
|
+
if self.image_converter:
|
97
|
+
config["image_converter"] = keras.layers.serialize(
|
98
|
+
self.image_converter
|
99
|
+
)
|
72
100
|
return config
|
73
101
|
|
74
102
|
@classmethod
|
75
103
|
def from_config(cls, config):
|
76
104
|
if "tokenizer" in config and isinstance(config["tokenizer"], dict):
|
77
105
|
config["tokenizer"] = keras.layers.deserialize(config["tokenizer"])
|
106
|
+
if "audio_converter" in config and isinstance(
|
107
|
+
config["audio_converter"], dict
|
108
|
+
):
|
109
|
+
config["audio_converter"] = keras.layers.deserialize(
|
110
|
+
config["audio_converter"]
|
111
|
+
)
|
112
|
+
if "image_converter" in config and isinstance(
|
113
|
+
config["image_converter"], dict
|
114
|
+
):
|
115
|
+
config["image_converter"] = keras.layers.deserialize(
|
116
|
+
config["image_converter"]
|
117
|
+
)
|
78
118
|
return cls(**config)
|
79
119
|
|
80
120
|
@classproperty
|
81
121
|
def presets(cls):
|
82
|
-
presets
|
83
|
-
|
84
|
-
if cls.tokenizer_cls is not None:
|
85
|
-
presets.update(cls.tokenizer_cls.presets)
|
86
|
-
for subclass in list_subclasses(cls):
|
87
|
-
presets.update(subclass.presets)
|
88
|
-
return presets
|
122
|
+
"""List built-in presets for a `Preprocessor` subclass."""
|
123
|
+
return builtin_presets(cls)
|
89
124
|
|
90
125
|
@classmethod
|
91
126
|
def from_preset(
|
@@ -96,10 +131,10 @@ class Preprocessor(PreprocessingLayer):
|
|
96
131
|
"""Instantiate a `keras_hub.models.Preprocessor` from a model preset.
|
97
132
|
|
98
133
|
A preset is a directory of configs, weights and other file assets used
|
99
|
-
to save and load a pre-trained model. The `preset` can be passed as
|
134
|
+
to save and load a pre-trained model. The `preset` can be passed as
|
100
135
|
one of:
|
101
136
|
|
102
|
-
1. a built
|
137
|
+
1. a built-in preset identifier like `'bert_base_en'`
|
103
138
|
2. a Kaggle Models handle like `'kaggle://user/bert/keras/bert_base_en'`
|
104
139
|
3. a Hugging Face handle like `'hf://user/bert_base_en'`
|
105
140
|
4. a path to a local preset directory like `'./bert_base_en'`
|
@@ -109,10 +144,10 @@ class Preprocessor(PreprocessingLayer):
|
|
109
144
|
|
110
145
|
As there are usually multiple preprocessing classes for a given model,
|
111
146
|
this method should be called on a specific subclass like
|
112
|
-
`keras_hub.models.
|
147
|
+
`keras_hub.models.BertTextClassifierPreprocessor.from_preset()`.
|
113
148
|
|
114
149
|
Args:
|
115
|
-
preset: string. A built
|
150
|
+
preset: string. A built-in preset identifier, a Kaggle Models
|
116
151
|
handle, a Hugging Face handle, or a path to a local directory.
|
117
152
|
|
118
153
|
Examples:
|
@@ -123,75 +158,24 @@ class Preprocessor(PreprocessingLayer):
|
|
123
158
|
)
|
124
159
|
|
125
160
|
# Load a preprocessor for Bert classification.
|
126
|
-
preprocessor = keras_hub.models.
|
161
|
+
preprocessor = keras_hub.models.BertTextClassifierPreprocessor.from_preset(
|
127
162
|
"bert_base_en",
|
128
163
|
)
|
129
164
|
```
|
130
165
|
"""
|
131
|
-
format = check_format(preset)
|
132
|
-
|
133
|
-
if format == "transformers":
|
134
|
-
if cls.tokenizer_cls is None:
|
135
|
-
raise ValueError("Tokenizer class is None")
|
136
|
-
tokenizer = cls.tokenizer_cls.from_preset(preset)
|
137
|
-
return cls(tokenizer=tokenizer, **kwargs)
|
138
|
-
|
139
166
|
if cls == Preprocessor:
|
140
167
|
raise ValueError(
|
141
|
-
"Do not call `Preprocessor.from_preset()` directly. Instead
|
142
|
-
"choose a particular task class, e.g. "
|
143
|
-
"`keras_hub.models.
|
144
|
-
)
|
145
|
-
# Check if we should load a `preprocessor.json` directly.
|
146
|
-
load_preprocessor_config = False
|
147
|
-
if check_file_exists(preset, PREPROCESSOR_CONFIG_FILE):
|
148
|
-
preprocessor_preset_cls = check_config_class(
|
149
|
-
preset, PREPROCESSOR_CONFIG_FILE
|
168
|
+
"Do not call `Preprocessor.from_preset()` directly. Instead "
|
169
|
+
"choose a particular task preprocessing class, e.g. "
|
170
|
+
"`keras_hub.models.TextClassifierPreprocessor.from_preset()`."
|
150
171
|
)
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
)
|
159
|
-
preprocessor.tokenizer.load_preset_assets(preset)
|
160
|
-
return preprocessor
|
161
|
-
|
162
|
-
# Tokenizer case.
|
163
|
-
# If `preprocessor.json` doesn't exist or preprocessor preset class is
|
164
|
-
# different from the calling class, create the preprocessor based on
|
165
|
-
# `tokenizer.json`.
|
166
|
-
tokenizer_preset_cls = check_config_class(
|
167
|
-
preset, config_file=TOKENIZER_CONFIG_FILE
|
168
|
-
)
|
169
|
-
if tokenizer_preset_cls is not cls.tokenizer_cls:
|
170
|
-
subclasses = list_subclasses(cls)
|
171
|
-
subclasses = tuple(
|
172
|
-
filter(
|
173
|
-
lambda x: x.tokenizer_cls == tokenizer_preset_cls,
|
174
|
-
subclasses,
|
175
|
-
)
|
176
|
-
)
|
177
|
-
if len(subclasses) == 0:
|
178
|
-
raise ValueError(
|
179
|
-
f"No registered subclass of `{cls.__name__}` can load "
|
180
|
-
f"a `{tokenizer_preset_cls.__name__}`."
|
181
|
-
)
|
182
|
-
if len(subclasses) > 1:
|
183
|
-
names = ", ".join(f"`{x.__name__}`" for x in subclasses)
|
184
|
-
raise ValueError(
|
185
|
-
f"Ambiguous call to `{cls.__name__}.from_preset()`. "
|
186
|
-
f"Found multiple possible subclasses {names}. "
|
187
|
-
"Please call `from_preset` on a subclass directly."
|
188
|
-
)
|
189
|
-
|
190
|
-
tokenizer = load_serialized_object(preset, TOKENIZER_CONFIG_FILE)
|
191
|
-
tokenizer.load_preset_assets(preset)
|
192
|
-
preprocessor = cls(tokenizer=tokenizer, **kwargs)
|
193
|
-
|
194
|
-
return preprocessor
|
172
|
+
|
173
|
+
loader = get_preset_loader(preset)
|
174
|
+
backbone_cls = loader.check_backbone_class()
|
175
|
+
# Detect the correct subclass if we need to.
|
176
|
+
if cls.backbone_cls != backbone_cls:
|
177
|
+
cls = find_subclass(preset, cls, backbone_cls)
|
178
|
+
return loader.load_preprocessor(cls, **kwargs)
|
195
179
|
|
196
180
|
def save_to_preset(self, preset_dir):
|
197
181
|
"""Save preprocessor to a preset directory.
|
@@ -204,4 +188,9 @@ class Preprocessor(PreprocessingLayer):
|
|
204
188
|
preset_dir,
|
205
189
|
config_file=PREPROCESSOR_CONFIG_FILE,
|
206
190
|
)
|
207
|
-
self.tokenizer
|
191
|
+
if self.tokenizer:
|
192
|
+
self.tokenizer.save_to_preset(preset_dir)
|
193
|
+
if self.audio_converter:
|
194
|
+
self.audio_converter.save_to_preset(preset_dir)
|
195
|
+
if self.image_converter:
|
196
|
+
self.image_converter.save_to_preset(preset_dir)
|
@@ -11,3 +11,9 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
+
|
15
|
+
from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone
|
16
|
+
from keras_hub.src.models.resnet.resnet_presets import backbone_presets
|
17
|
+
from keras_hub.src.utils.preset_utils import register_presets
|
18
|
+
|
19
|
+
register_presets(backbone_presets, ResNetBackbone)
|