keras-hub-nightly 0.15.0.dev20240823171555__py3-none-any.whl → 0.15.0.dev20240911134614__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/__init__.py +1 -0
- keras_hub/api/bounding_box/__init__.py +36 -0
- keras_hub/api/layers/__init__.py +14 -0
- keras_hub/api/models/__init__.py +75 -31
- keras_hub/api/tokenizers/__init__.py +30 -0
- keras_hub/src/bounding_box/__init__.py +13 -0
- keras_hub/src/bounding_box/converters.py +529 -0
- keras_hub/src/bounding_box/formats.py +162 -0
- keras_hub/src/bounding_box/iou.py +263 -0
- keras_hub/src/bounding_box/to_dense.py +95 -0
- keras_hub/src/bounding_box/to_ragged.py +99 -0
- keras_hub/src/bounding_box/utils.py +194 -0
- keras_hub/src/bounding_box/validate_format.py +99 -0
- keras_hub/src/layers/preprocessing/audio_converter.py +121 -0
- keras_hub/src/layers/preprocessing/image_converter.py +130 -0
- keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py +2 -0
- keras_hub/src/layers/preprocessing/multi_segment_packer.py +9 -8
- keras_hub/src/layers/preprocessing/preprocessing_layer.py +2 -29
- keras_hub/src/layers/preprocessing/random_deletion.py +33 -31
- keras_hub/src/layers/preprocessing/random_swap.py +33 -31
- keras_hub/src/layers/preprocessing/resizing_image_converter.py +101 -0
- keras_hub/src/layers/preprocessing/start_end_packer.py +3 -2
- keras_hub/src/models/albert/__init__.py +1 -2
- keras_hub/src/models/albert/albert_masked_lm_preprocessor.py +6 -86
- keras_hub/src/models/albert/{albert_classifier.py → albert_text_classifier.py} +29 -10
- keras_hub/src/models/albert/{albert_preprocessor.py → albert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/albert/albert_tokenizer.py +17 -36
- keras_hub/src/models/backbone.py +12 -34
- keras_hub/src/models/bart/__init__.py +1 -2
- keras_hub/src/models/bart/bart_preprocessor.py +6 -18
- keras_hub/src/models/bart/bart_seq_2_seq_lm_preprocessor.py +21 -148
- keras_hub/src/models/bart/bart_tokenizer.py +12 -39
- keras_hub/src/models/bert/__init__.py +1 -5
- keras_hub/src/models/bert/bert_masked_lm_preprocessor.py +6 -87
- keras_hub/src/models/bert/bert_presets.py +1 -4
- keras_hub/src/models/bert/{bert_classifier.py → bert_text_classifier.py} +12 -10
- keras_hub/src/models/bert/{bert_preprocessor.py → bert_text_classifier_preprocessor.py} +14 -70
- keras_hub/src/models/bert/bert_tokenizer.py +17 -35
- keras_hub/src/models/bloom/__init__.py +1 -2
- keras_hub/src/models/bloom/bloom_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/bloom/bloom_preprocessor.py +5 -12
- keras_hub/src/models/bloom/bloom_tokenizer.py +12 -41
- keras_hub/src/models/causal_lm.py +10 -29
- keras_hub/src/models/causal_lm_preprocessor.py +195 -0
- keras_hub/src/models/csp_darknet/csp_darknet_backbone.py +54 -15
- keras_hub/src/models/deberta_v3/__init__.py +1 -4
- keras_hub/src/models/deberta_v3/deberta_v3_masked_lm_preprocessor.py +14 -77
- keras_hub/src/models/deberta_v3/{deberta_v3_classifier.py → deberta_v3_text_classifier.py} +11 -11
- keras_hub/src/models/deberta_v3/{deberta_v3_preprocessor.py → deberta_v3_text_classifier_preprocessor.py} +23 -64
- keras_hub/src/models/deberta_v3/deberta_v3_tokenizer.py +30 -25
- keras_hub/src/models/densenet/densenet_backbone.py +46 -22
- keras_hub/src/models/distil_bert/__init__.py +1 -4
- keras_hub/src/models/distil_bert/distil_bert_masked_lm_preprocessor.py +14 -76
- keras_hub/src/models/distil_bert/{distil_bert_classifier.py → distil_bert_text_classifier.py} +12 -12
- keras_hub/src/models/distil_bert/{distil_bert_preprocessor.py → distil_bert_text_classifier_preprocessor.py} +23 -63
- keras_hub/src/models/distil_bert/distil_bert_tokenizer.py +19 -35
- keras_hub/src/models/efficientnet/__init__.py +13 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +569 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +229 -0
- keras_hub/src/models/efficientnet/mbconv.py +238 -0
- keras_hub/src/models/electra/__init__.py +1 -2
- keras_hub/src/models/electra/electra_preprocessor.py +6 -5
- keras_hub/src/models/electra/electra_tokenizer.py +17 -32
- keras_hub/src/models/f_net/__init__.py +1 -2
- keras_hub/src/models/f_net/f_net_masked_lm_preprocessor.py +12 -78
- keras_hub/src/models/f_net/{f_net_classifier.py → f_net_text_classifier.py} +10 -8
- keras_hub/src/models/f_net/{f_net_preprocessor.py → f_net_text_classifier_preprocessor.py} +19 -63
- keras_hub/src/models/f_net/f_net_tokenizer.py +17 -35
- keras_hub/src/models/falcon/__init__.py +1 -2
- keras_hub/src/models/falcon/falcon_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/falcon/falcon_preprocessor.py +5 -12
- keras_hub/src/models/falcon/falcon_tokenizer.py +12 -35
- keras_hub/src/models/gemma/__init__.py +1 -2
- keras_hub/src/models/gemma/gemma_causal_lm_preprocessor.py +6 -90
- keras_hub/src/models/gemma/gemma_preprocessor.py +5 -12
- keras_hub/src/models/gemma/gemma_tokenizer.py +12 -23
- keras_hub/src/models/gpt2/__init__.py +1 -2
- keras_hub/src/models/gpt2/gpt2_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/gpt2/gpt2_preprocessor.py +5 -12
- keras_hub/src/models/gpt2/gpt2_tokenizer.py +12 -34
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_preprocessor.py +5 -12
- keras_hub/src/models/gpt_neo_x/gpt_neo_x_tokenizer.py +12 -34
- keras_hub/src/models/image_classifier.py +0 -5
- keras_hub/src/models/image_classifier_preprocessor.py +83 -0
- keras_hub/src/models/llama/__init__.py +1 -2
- keras_hub/src/models/llama/llama_causal_lm_preprocessor.py +6 -85
- keras_hub/src/models/llama/llama_preprocessor.py +5 -12
- keras_hub/src/models/llama/llama_tokenizer.py +12 -25
- keras_hub/src/models/llama3/__init__.py +1 -2
- keras_hub/src/models/llama3/llama3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/llama3/llama3_preprocessor.py +2 -0
- keras_hub/src/models/llama3/llama3_tokenizer.py +12 -33
- keras_hub/src/models/masked_lm.py +0 -2
- keras_hub/src/models/masked_lm_preprocessor.py +156 -0
- keras_hub/src/models/mistral/__init__.py +1 -2
- keras_hub/src/models/mistral/mistral_causal_lm_preprocessor.py +6 -91
- keras_hub/src/models/mistral/mistral_preprocessor.py +5 -12
- keras_hub/src/models/mistral/mistral_tokenizer.py +12 -23
- keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +2 -2
- keras_hub/src/models/mobilenet/__init__.py +13 -0
- keras_hub/src/models/mobilenet/mobilenet_backbone.py +530 -0
- keras_hub/src/models/mobilenet/mobilenet_image_classifier.py +114 -0
- keras_hub/src/models/opt/__init__.py +1 -2
- keras_hub/src/models/opt/opt_causal_lm_preprocessor.py +6 -93
- keras_hub/src/models/opt/opt_preprocessor.py +5 -12
- keras_hub/src/models/opt/opt_tokenizer.py +12 -41
- keras_hub/src/models/pali_gemma/__init__.py +1 -4
- keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py +28 -28
- keras_hub/src/models/pali_gemma/pali_gemma_image_converter.py +25 -0
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +5 -5
- keras_hub/src/models/pali_gemma/pali_gemma_tokenizer.py +10 -2
- keras_hub/src/models/phi3/__init__.py +1 -2
- keras_hub/src/models/phi3/phi3_causal_lm.py +3 -9
- keras_hub/src/models/phi3/phi3_causal_lm_preprocessor.py +6 -89
- keras_hub/src/models/phi3/phi3_preprocessor.py +5 -12
- keras_hub/src/models/phi3/phi3_tokenizer.py +12 -36
- keras_hub/src/models/preprocessor.py +76 -83
- keras_hub/src/models/resnet/__init__.py +6 -0
- keras_hub/src/models/resnet/resnet_backbone.py +387 -26
- keras_hub/src/models/resnet/resnet_image_classifier.py +7 -3
- keras_hub/src/models/resnet/resnet_image_classifier_preprocessor.py +28 -0
- keras_hub/src/models/resnet/resnet_image_converter.py +23 -0
- keras_hub/src/models/resnet/resnet_presets.py +95 -0
- keras_hub/src/models/roberta/__init__.py +1 -2
- keras_hub/src/models/roberta/roberta_masked_lm_preprocessor.py +22 -74
- keras_hub/src/models/roberta/{roberta_classifier.py → roberta_text_classifier.py} +11 -11
- keras_hub/src/models/roberta/{roberta_preprocessor.py → roberta_text_classifier_preprocessor.py} +21 -53
- keras_hub/src/models/roberta/roberta_tokenizer.py +13 -52
- keras_hub/src/models/seq_2_seq_lm_preprocessor.py +269 -0
- keras_hub/src/models/stable_diffusion_v3/__init__.py +13 -0
- keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py +103 -0
- keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +93 -0
- keras_hub/src/models/stable_diffusion_v3/clip_text_encoder.py +149 -0
- keras_hub/src/models/stable_diffusion_v3/clip_tokenizer.py +167 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit.py +427 -0
- keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +317 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_preprocessor.py +74 -0
- keras_hub/src/models/stable_diffusion_v3/t5_xxl_text_encoder.py +155 -0
- keras_hub/src/models/stable_diffusion_v3/vae_attention.py +126 -0
- keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +186 -0
- keras_hub/src/models/t5/__init__.py +1 -2
- keras_hub/src/models/t5/t5_tokenizer.py +13 -23
- keras_hub/src/models/task.py +71 -116
- keras_hub/src/models/{classifier.py → text_classifier.py} +8 -13
- keras_hub/src/models/text_classifier_preprocessor.py +138 -0
- keras_hub/src/models/whisper/__init__.py +1 -2
- keras_hub/src/models/whisper/{whisper_audio_feature_extractor.py → whisper_audio_converter.py} +20 -18
- keras_hub/src/models/whisper/whisper_backbone.py +0 -3
- keras_hub/src/models/whisper/whisper_presets.py +10 -10
- keras_hub/src/models/whisper/whisper_tokenizer.py +20 -16
- keras_hub/src/models/xlm_roberta/__init__.py +1 -4
- keras_hub/src/models/xlm_roberta/xlm_roberta_masked_lm_preprocessor.py +26 -72
- keras_hub/src/models/xlm_roberta/{xlm_roberta_classifier.py → xlm_roberta_text_classifier.py} +11 -11
- keras_hub/src/models/xlm_roberta/{xlm_roberta_preprocessor.py → xlm_roberta_text_classifier_preprocessor.py} +26 -53
- keras_hub/src/models/xlm_roberta/xlm_roberta_tokenizer.py +25 -10
- keras_hub/src/tests/test_case.py +25 -0
- keras_hub/src/tokenizers/byte_pair_tokenizer.py +29 -17
- keras_hub/src/tokenizers/byte_tokenizer.py +14 -15
- keras_hub/src/tokenizers/sentence_piece_tokenizer.py +19 -7
- keras_hub/src/tokenizers/tokenizer.py +67 -32
- keras_hub/src/tokenizers/unicode_codepoint_tokenizer.py +14 -15
- keras_hub/src/tokenizers/word_piece_tokenizer.py +33 -47
- keras_hub/src/utils/keras_utils.py +0 -50
- keras_hub/src/utils/preset_utils.py +238 -67
- keras_hub/src/utils/tensor_utils.py +187 -69
- keras_hub/src/utils/timm/convert_resnet.py +20 -16
- keras_hub/src/utils/timm/preset_loader.py +67 -0
- keras_hub/src/utils/transformers/convert_albert.py +193 -0
- keras_hub/src/utils/transformers/convert_bart.py +373 -0
- keras_hub/src/utils/transformers/convert_bert.py +7 -17
- keras_hub/src/utils/transformers/convert_distilbert.py +10 -20
- keras_hub/src/utils/transformers/convert_gemma.py +5 -19
- keras_hub/src/utils/transformers/convert_gpt2.py +5 -18
- keras_hub/src/utils/transformers/convert_llama3.py +7 -18
- keras_hub/src/utils/transformers/convert_mistral.py +129 -0
- keras_hub/src/utils/transformers/convert_pali_gemma.py +7 -29
- keras_hub/src/utils/transformers/preset_loader.py +77 -0
- keras_hub/src/utils/transformers/safetensor_utils.py +2 -2
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/METADATA +1 -2
- keras_hub_nightly-0.15.0.dev20240911134614.dist-info/RECORD +338 -0
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/WHEEL +1 -1
- keras_hub/src/models/whisper/whisper_preprocessor.py +0 -326
- keras_hub/src/utils/timm/convert.py +0 -37
- keras_hub/src/utils/transformers/convert.py +0 -101
- keras_hub_nightly-0.15.0.dev20240823171555.dist-info/RECORD +0 -297
- {keras_hub_nightly-0.15.0.dev20240823171555.dist-info → keras_hub_nightly-0.15.0.dev20240911134614.dist-info}/top_level.txt +0 -0
@@ -11,17 +11,19 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
import copy
|
15
|
-
|
16
14
|
from keras_hub.src.api_export import keras_hub_export
|
17
|
-
from keras_hub.src.models.phi3.
|
15
|
+
from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone
|
18
16
|
from keras_hub.src.tokenizers.sentence_piece_tokenizer import (
|
19
17
|
SentencePieceTokenizer,
|
20
18
|
)
|
21
|
-
from keras_hub.src.utils.python_utils import classproperty
|
22
19
|
|
23
20
|
|
24
|
-
@keras_hub_export(
|
21
|
+
@keras_hub_export(
|
22
|
+
[
|
23
|
+
"keras_hub.tokenizers.Phi3Tokenizer",
|
24
|
+
"keras_hub.models.Phi3Tokenizer",
|
25
|
+
]
|
26
|
+
)
|
25
27
|
class Phi3Tokenizer(SentencePieceTokenizer):
|
26
28
|
"""Phi3 tokenizer layer based on SentencePiece.
|
27
29
|
|
@@ -31,10 +33,6 @@ class Phi3Tokenizer(SentencePieceTokenizer):
|
|
31
33
|
Phi3 models and provides a `from_preset()` method to automatically
|
32
34
|
download a matching vocabulary for a Phi3 preset.
|
33
35
|
|
34
|
-
This tokenizer does not provide truncation or padding of inputs. It can be
|
35
|
-
combined with a `keras_hub.models.Phi3Preprocessor` layer for input
|
36
|
-
packing.
|
37
|
-
|
38
36
|
If input is a batch of strings (rank > 0), the layer will output a
|
39
37
|
`tf.RaggedTensor` where the last dimension of the output is ragged.
|
40
38
|
|
@@ -63,32 +61,10 @@ class Phi3Tokenizer(SentencePieceTokenizer):
|
|
63
61
|
```
|
64
62
|
"""
|
65
63
|
|
64
|
+
backbone_cls = Phi3Backbone
|
65
|
+
|
66
66
|
def __init__(self, proto, **kwargs):
|
67
|
-
self.
|
68
|
-
self.
|
67
|
+
self._add_special_token("<s>", "start_token")
|
68
|
+
self._add_special_token("<|endoftext|>", "end_token")
|
69
|
+
self.pad_token_id = 0
|
69
70
|
super().__init__(proto=proto, **kwargs)
|
70
|
-
|
71
|
-
def set_proto(self, proto):
|
72
|
-
super().set_proto(proto)
|
73
|
-
if proto is not None:
|
74
|
-
for token in [self.start_token, self.end_token]:
|
75
|
-
if token not in self.get_vocabulary():
|
76
|
-
raise ValueError(
|
77
|
-
f"Cannot find token `'{token}'` in the provided "
|
78
|
-
f"`vocabulary`. Please provide `'{token}'` in your "
|
79
|
-
"`vocabulary` or use a pretrained `vocabulary` name."
|
80
|
-
)
|
81
|
-
self.start_token_id = self.token_to_id(self.start_token)
|
82
|
-
self.end_token_id = self.token_to_id(self.end_token)
|
83
|
-
# TODO: `pad_token` is `<|endoftext|>`, but setting it to `<unk>`
|
84
|
-
# for now, because of the way sampler works. sampler will think that
|
85
|
-
# `pad_token` is `end_token` and stop generation immediatly.
|
86
|
-
self.pad_token_id = 0
|
87
|
-
else:
|
88
|
-
self.start_token_id = None
|
89
|
-
self.end_token_id = None
|
90
|
-
self.pad_token_id = None
|
91
|
-
|
92
|
-
@classproperty
|
93
|
-
def presets(cls):
|
94
|
-
return copy.deepcopy(backbone_presets)
|
@@ -19,13 +19,9 @@ from keras_hub.src.layers.preprocessing.preprocessing_layer import (
|
|
19
19
|
PreprocessingLayer,
|
20
20
|
)
|
21
21
|
from keras_hub.src.utils.preset_utils import PREPROCESSOR_CONFIG_FILE
|
22
|
-
from keras_hub.src.utils.preset_utils import
|
23
|
-
from keras_hub.src.utils.preset_utils import
|
24
|
-
from keras_hub.src.utils.preset_utils import
|
25
|
-
from keras_hub.src.utils.preset_utils import check_format
|
26
|
-
from keras_hub.src.utils.preset_utils import list_presets
|
27
|
-
from keras_hub.src.utils.preset_utils import list_subclasses
|
28
|
-
from keras_hub.src.utils.preset_utils import load_serialized_object
|
22
|
+
from keras_hub.src.utils.preset_utils import builtin_presets
|
23
|
+
from keras_hub.src.utils.preset_utils import find_subclass
|
24
|
+
from keras_hub.src.utils.preset_utils import get_preset_loader
|
29
25
|
from keras_hub.src.utils.preset_utils import save_serialized_object
|
30
26
|
from keras_hub.src.utils.python_utils import classproperty
|
31
27
|
|
@@ -45,15 +41,20 @@ class Preprocessor(PreprocessingLayer):
|
|
45
41
|
should set the `tokenizer` property on construction.
|
46
42
|
"""
|
47
43
|
|
44
|
+
backbone_cls = None
|
48
45
|
tokenizer_cls = None
|
46
|
+
audio_converter_cls = None
|
47
|
+
image_converter_cls = None
|
49
48
|
|
50
49
|
def __init__(self, *args, **kwargs):
|
51
50
|
super().__init__(*args, **kwargs)
|
52
51
|
self._tokenizer = None
|
52
|
+
self._image_converter = None
|
53
|
+
self._audio_converter = None
|
53
54
|
|
54
55
|
def __setattr__(self, name, value):
|
55
56
|
# Work around torch setattr for properties.
|
56
|
-
if name in ["tokenizer"]:
|
57
|
+
if name in ["tokenizer", "audio_converter", "image_converter"]:
|
57
58
|
return object.__setattr__(self, name, value)
|
58
59
|
return super().__setattr__(name, value)
|
59
60
|
|
@@ -66,40 +67,75 @@ class Preprocessor(PreprocessingLayer):
|
|
66
67
|
def tokenizer(self, value):
|
67
68
|
self._tokenizer = value
|
68
69
|
|
70
|
+
@property
|
71
|
+
def audio_converter(self):
|
72
|
+
"""The audio converter used to preprocess audio data."""
|
73
|
+
return self._audio_converter
|
74
|
+
|
75
|
+
@audio_converter.setter
|
76
|
+
def audio_converter(self, value):
|
77
|
+
self._audio_converter = value
|
78
|
+
|
79
|
+
@property
|
80
|
+
def image_converter(self):
|
81
|
+
"""The image converter used to preprocess image data."""
|
82
|
+
return self._image_converter
|
83
|
+
|
84
|
+
@image_converter.setter
|
85
|
+
def image_converter(self, value):
|
86
|
+
self._image_converter = value
|
87
|
+
|
69
88
|
def get_config(self):
|
70
89
|
config = super().get_config()
|
71
|
-
|
90
|
+
if self.tokenizer:
|
91
|
+
config["tokenizer"] = keras.layers.serialize(self.tokenizer)
|
92
|
+
if self.audio_converter:
|
93
|
+
config["audio_converter"] = keras.layers.serialize(
|
94
|
+
self.audio_converter
|
95
|
+
)
|
96
|
+
if self.image_converter:
|
97
|
+
config["image_converter"] = keras.layers.serialize(
|
98
|
+
self.image_converter
|
99
|
+
)
|
72
100
|
return config
|
73
101
|
|
74
102
|
@classmethod
|
75
103
|
def from_config(cls, config):
|
76
104
|
if "tokenizer" in config and isinstance(config["tokenizer"], dict):
|
77
105
|
config["tokenizer"] = keras.layers.deserialize(config["tokenizer"])
|
106
|
+
if "audio_converter" in config and isinstance(
|
107
|
+
config["audio_converter"], dict
|
108
|
+
):
|
109
|
+
config["audio_converter"] = keras.layers.deserialize(
|
110
|
+
config["audio_converter"]
|
111
|
+
)
|
112
|
+
if "image_converter" in config and isinstance(
|
113
|
+
config["image_converter"], dict
|
114
|
+
):
|
115
|
+
config["image_converter"] = keras.layers.deserialize(
|
116
|
+
config["image_converter"]
|
117
|
+
)
|
78
118
|
return cls(**config)
|
79
119
|
|
80
120
|
@classproperty
|
81
121
|
def presets(cls):
|
82
|
-
presets
|
83
|
-
|
84
|
-
if cls.tokenizer_cls is not None:
|
85
|
-
presets.update(cls.tokenizer_cls.presets)
|
86
|
-
for subclass in list_subclasses(cls):
|
87
|
-
presets.update(subclass.presets)
|
88
|
-
return presets
|
122
|
+
"""List built-in presets for a `Preprocessor` subclass."""
|
123
|
+
return builtin_presets(cls)
|
89
124
|
|
90
125
|
@classmethod
|
91
126
|
def from_preset(
|
92
127
|
cls,
|
93
128
|
preset,
|
129
|
+
load_task_extras=False,
|
94
130
|
**kwargs,
|
95
131
|
):
|
96
132
|
"""Instantiate a `keras_hub.models.Preprocessor` from a model preset.
|
97
133
|
|
98
134
|
A preset is a directory of configs, weights and other file assets used
|
99
|
-
to save and load a pre-trained model. The `preset` can be passed as
|
135
|
+
to save and load a pre-trained model. The `preset` can be passed as
|
100
136
|
one of:
|
101
137
|
|
102
|
-
1. a built
|
138
|
+
1. a built-in preset identifier like `'bert_base_en'`
|
103
139
|
2. a Kaggle Models handle like `'kaggle://user/bert/keras/bert_base_en'`
|
104
140
|
3. a Hugging Face handle like `'hf://user/bert_base_en'`
|
105
141
|
4. a path to a local preset directory like `'./bert_base_en'`
|
@@ -109,11 +145,14 @@ class Preprocessor(PreprocessingLayer):
|
|
109
145
|
|
110
146
|
As there are usually multiple preprocessing classes for a given model,
|
111
147
|
this method should be called on a specific subclass like
|
112
|
-
`keras_hub.models.
|
148
|
+
`keras_hub.models.BertTextClassifierPreprocessor.from_preset()`.
|
113
149
|
|
114
150
|
Args:
|
115
|
-
preset: string. A built
|
151
|
+
preset: string. A built-in preset identifier, a Kaggle Models
|
116
152
|
handle, a Hugging Face handle, or a path to a local directory.
|
153
|
+
load_task_extras: bool. If `True`, load the saved task preprocessing
|
154
|
+
configuration from a `preprocessing.json`. You might use this to
|
155
|
+
restore the sequence length a model was fine-tuned with.
|
117
156
|
|
118
157
|
Examples:
|
119
158
|
```python
|
@@ -123,75 +162,24 @@ class Preprocessor(PreprocessingLayer):
|
|
123
162
|
)
|
124
163
|
|
125
164
|
# Load a preprocessor for Bert classification.
|
126
|
-
preprocessor = keras_hub.models.
|
165
|
+
preprocessor = keras_hub.models.BertTextClassifierPreprocessor.from_preset(
|
127
166
|
"bert_base_en",
|
128
167
|
)
|
129
168
|
```
|
130
169
|
"""
|
131
|
-
format = check_format(preset)
|
132
|
-
|
133
|
-
if format == "transformers":
|
134
|
-
if cls.tokenizer_cls is None:
|
135
|
-
raise ValueError("Tokenizer class is None")
|
136
|
-
tokenizer = cls.tokenizer_cls.from_preset(preset)
|
137
|
-
return cls(tokenizer=tokenizer, **kwargs)
|
138
|
-
|
139
170
|
if cls == Preprocessor:
|
140
171
|
raise ValueError(
|
141
|
-
"Do not call `Preprocessor.from_preset()` directly. Instead
|
142
|
-
"choose a particular task class, e.g. "
|
143
|
-
"`keras_hub.models.
|
144
|
-
)
|
145
|
-
# Check if we should load a `preprocessor.json` directly.
|
146
|
-
load_preprocessor_config = False
|
147
|
-
if check_file_exists(preset, PREPROCESSOR_CONFIG_FILE):
|
148
|
-
preprocessor_preset_cls = check_config_class(
|
149
|
-
preset, PREPROCESSOR_CONFIG_FILE
|
172
|
+
"Do not call `Preprocessor.from_preset()` directly. Instead "
|
173
|
+
"choose a particular task preprocessing class, e.g. "
|
174
|
+
"`keras_hub.models.TextClassifierPreprocessor.from_preset()`."
|
150
175
|
)
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
)
|
159
|
-
preprocessor.tokenizer.load_preset_assets(preset)
|
160
|
-
return preprocessor
|
161
|
-
|
162
|
-
# Tokenizer case.
|
163
|
-
# If `preprocessor.json` doesn't exist or preprocessor preset class is
|
164
|
-
# different from the calling class, create the preprocessor based on
|
165
|
-
# `tokenizer.json`.
|
166
|
-
tokenizer_preset_cls = check_config_class(
|
167
|
-
preset, config_file=TOKENIZER_CONFIG_FILE
|
168
|
-
)
|
169
|
-
if tokenizer_preset_cls is not cls.tokenizer_cls:
|
170
|
-
subclasses = list_subclasses(cls)
|
171
|
-
subclasses = tuple(
|
172
|
-
filter(
|
173
|
-
lambda x: x.tokenizer_cls == tokenizer_preset_cls,
|
174
|
-
subclasses,
|
175
|
-
)
|
176
|
-
)
|
177
|
-
if len(subclasses) == 0:
|
178
|
-
raise ValueError(
|
179
|
-
f"No registered subclass of `{cls.__name__}` can load "
|
180
|
-
f"a `{tokenizer_preset_cls.__name__}`."
|
181
|
-
)
|
182
|
-
if len(subclasses) > 1:
|
183
|
-
names = ", ".join(f"`{x.__name__}`" for x in subclasses)
|
184
|
-
raise ValueError(
|
185
|
-
f"Ambiguous call to `{cls.__name__}.from_preset()`. "
|
186
|
-
f"Found multiple possible subclasses {names}. "
|
187
|
-
"Please call `from_preset` on a subclass directly."
|
188
|
-
)
|
189
|
-
|
190
|
-
tokenizer = load_serialized_object(preset, TOKENIZER_CONFIG_FILE)
|
191
|
-
tokenizer.load_preset_assets(preset)
|
192
|
-
preprocessor = cls(tokenizer=tokenizer, **kwargs)
|
193
|
-
|
194
|
-
return preprocessor
|
176
|
+
|
177
|
+
loader = get_preset_loader(preset)
|
178
|
+
backbone_cls = loader.check_backbone_class()
|
179
|
+
# Detect the correct subclass if we need to.
|
180
|
+
if cls.backbone_cls != backbone_cls:
|
181
|
+
cls = find_subclass(preset, cls, backbone_cls)
|
182
|
+
return loader.load_preprocessor(cls, load_task_extras, **kwargs)
|
195
183
|
|
196
184
|
def save_to_preset(self, preset_dir):
|
197
185
|
"""Save preprocessor to a preset directory.
|
@@ -204,4 +192,9 @@ class Preprocessor(PreprocessingLayer):
|
|
204
192
|
preset_dir,
|
205
193
|
config_file=PREPROCESSOR_CONFIG_FILE,
|
206
194
|
)
|
207
|
-
self.tokenizer
|
195
|
+
if self.tokenizer:
|
196
|
+
self.tokenizer.save_to_preset(preset_dir)
|
197
|
+
if self.audio_converter:
|
198
|
+
self.audio_converter.save_to_preset(preset_dir)
|
199
|
+
if self.image_converter:
|
200
|
+
self.image_converter.save_to_preset(preset_dir)
|
@@ -11,3 +11,9 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
+
|
15
|
+
from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone
|
16
|
+
from keras_hub.src.models.resnet.resnet_presets import backbone_presets
|
17
|
+
from keras_hub.src.utils.preset_utils import register_presets
|
18
|
+
|
19
|
+
register_presets(backbone_presets, ResNetBackbone)
|