keras-hub-nightly 0.21.0.dev202505270408__py3-none-any.whl → 0.22.0.dev202505290412__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/src/layers/preprocessing/multi_segment_packer.py +17 -3
- keras_hub/src/layers/preprocessing/start_end_packer.py +24 -6
- keras_hub/src/models/backbone.py +13 -10
- keras_hub/src/models/gemma3/gemma3_backbone.py +2 -2
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +2 -2
- keras_hub/src/models/vit/vit_backbone.py +31 -11
- keras_hub/src/models/vit/vit_image_converter.py +0 -70
- keras_hub/src/models/vit/vit_layers.py +33 -18
- keras_hub/src/models/vit/vit_presets.py +11 -11
- keras_hub/src/utils/tensor_utils.py +14 -0
- keras_hub/src/version.py +1 -1
- {keras_hub_nightly-0.21.0.dev202505270408.dist-info → keras_hub_nightly-0.22.0.dev202505290412.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.21.0.dev202505270408.dist-info → keras_hub_nightly-0.22.0.dev202505290412.dist-info}/RECORD +15 -15
- {keras_hub_nightly-0.21.0.dev202505270408.dist-info → keras_hub_nightly-0.22.0.dev202505290412.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.21.0.dev202505270408.dist-info → keras_hub_nightly-0.22.0.dev202505290412.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ from keras_hub.src.layers.preprocessing.preprocessing_layer import (
|
|
3
3
|
PreprocessingLayer,
|
4
4
|
)
|
5
5
|
from keras_hub.src.utils.tensor_utils import convert_to_ragged_batch
|
6
|
+
from keras_hub.src.utils.tensor_utils import pad
|
6
7
|
from keras_hub.src.utils.tensor_utils import preprocessing_function
|
7
8
|
|
8
9
|
try:
|
@@ -66,6 +67,8 @@ class MultiSegmentPacker(PreprocessingLayer):
|
|
66
67
|
"waterfall" algorithm that allocates quota in a
|
67
68
|
left-to-right manner and fills up the buckets until we run
|
68
69
|
out of budget. It support arbitrary number of segments.
|
70
|
+
padding_side: str. Whether to pad the input on the "left" or "right".
|
71
|
+
Defaults to "right".
|
69
72
|
|
70
73
|
Returns:
|
71
74
|
A tuple with two elements. The first is the dense, packed token
|
@@ -124,6 +127,7 @@ class MultiSegmentPacker(PreprocessingLayer):
|
|
124
127
|
sep_value=None,
|
125
128
|
pad_value=None,
|
126
129
|
truncate="round_robin",
|
130
|
+
padding_side="right",
|
127
131
|
**kwargs,
|
128
132
|
):
|
129
133
|
super().__init__(**kwargs)
|
@@ -162,6 +166,7 @@ class MultiSegmentPacker(PreprocessingLayer):
|
|
162
166
|
self.end_value = end_value
|
163
167
|
|
164
168
|
self.pad_value = pad_value
|
169
|
+
self.padding_side = padding_side
|
165
170
|
|
166
171
|
def get_config(self):
|
167
172
|
config = super().get_config()
|
@@ -173,6 +178,7 @@ class MultiSegmentPacker(PreprocessingLayer):
|
|
173
178
|
"sep_value": self._sep_value,
|
174
179
|
"pad_value": self.pad_value,
|
175
180
|
"truncate": self.truncate,
|
181
|
+
"padding_side": self.padding_side,
|
176
182
|
}
|
177
183
|
)
|
178
184
|
return config
|
@@ -287,10 +293,18 @@ class MultiSegmentPacker(PreprocessingLayer):
|
|
287
293
|
# Pad to dense tensor output.
|
288
294
|
sequence_length = sequence_length or self.sequence_length
|
289
295
|
shape = tf.cast([-1, sequence_length], "int64")
|
290
|
-
token_ids =
|
291
|
-
|
296
|
+
token_ids = pad(
|
297
|
+
token_ids,
|
298
|
+
shape=shape,
|
299
|
+
padding_side=self.padding_side,
|
300
|
+
pad_value=self.pad_value,
|
301
|
+
)
|
302
|
+
segment_ids = pad(
|
303
|
+
segment_ids,
|
304
|
+
shape=shape,
|
305
|
+
padding_side=self.padding_side,
|
306
|
+
pad_value=0,
|
292
307
|
)
|
293
|
-
segment_ids = segment_ids.to_tensor(shape=shape)
|
294
308
|
# Remove the batch dim if added.
|
295
309
|
if unbatched:
|
296
310
|
token_ids = tf.squeeze(token_ids, 0)
|
@@ -3,6 +3,7 @@ from keras_hub.src.layers.preprocessing.preprocessing_layer import (
|
|
3
3
|
PreprocessingLayer,
|
4
4
|
)
|
5
5
|
from keras_hub.src.utils.tensor_utils import convert_to_ragged_batch
|
6
|
+
from keras_hub.src.utils.tensor_utils import pad
|
6
7
|
from keras_hub.src.utils.tensor_utils import preprocessing_function
|
7
8
|
|
8
9
|
try:
|
@@ -39,6 +40,8 @@ class StartEndPacker(PreprocessingLayer):
|
|
39
40
|
0 or "" will be added depending on the dtype of the input tensor.
|
40
41
|
return_padding_mask: bool. Whether to return a boolean padding mask of
|
41
42
|
all locations that are filled in with the `pad_value`.
|
43
|
+
padding_side: str. Whether to pad the input on the "left" or "right".
|
44
|
+
Defaults to "right".
|
42
45
|
|
43
46
|
Call arguments:
|
44
47
|
inputs: A `tf.Tensor`, `tf.RaggedTensor`, or list of python strings.
|
@@ -111,6 +114,7 @@ class StartEndPacker(PreprocessingLayer):
|
|
111
114
|
pad_value=None,
|
112
115
|
return_padding_mask=False,
|
113
116
|
name=None,
|
117
|
+
padding_side="right",
|
114
118
|
**kwargs,
|
115
119
|
):
|
116
120
|
super().__init__(name=name, **kwargs)
|
@@ -139,6 +143,7 @@ class StartEndPacker(PreprocessingLayer):
|
|
139
143
|
|
140
144
|
self.pad_value = pad_value
|
141
145
|
self.return_padding_mask = return_padding_mask
|
146
|
+
self.padding_side = padding_side
|
142
147
|
|
143
148
|
@preprocessing_function
|
144
149
|
def call(
|
@@ -154,6 +159,13 @@ class StartEndPacker(PreprocessingLayer):
|
|
154
159
|
batch_size = tf.shape(x)[0]
|
155
160
|
sequence_length = sequence_length or self.sequence_length
|
156
161
|
dtype = inputs.dtype
|
162
|
+
# Truncate.
|
163
|
+
truncation_length = sequence_length
|
164
|
+
if add_start_value and self.start_value is not None:
|
165
|
+
truncation_length -= len(self.start_value)
|
166
|
+
if add_end_value and self.end_value is not None:
|
167
|
+
truncation_length -= len(self.end_value)
|
168
|
+
x = x[..., :truncation_length]
|
157
169
|
|
158
170
|
# Concatenate start and end tokens.
|
159
171
|
if add_start_value and self.start_value is not None:
|
@@ -167,23 +179,28 @@ class StartEndPacker(PreprocessingLayer):
|
|
167
179
|
end_token_id_tensor = tf.repeat(
|
168
180
|
end_value[tf.newaxis, :], repeats=batch_size, axis=0
|
169
181
|
)
|
170
|
-
# Trim to leave room for end token.
|
171
|
-
x = x[..., : sequence_length - len(self.end_value)]
|
172
182
|
x = tf.concat([x, end_token_id_tensor], axis=-1)
|
173
183
|
|
174
184
|
# Pad to desired length.
|
175
|
-
outputs =
|
176
|
-
|
185
|
+
outputs = pad(
|
186
|
+
x,
|
187
|
+
pad_value=self.pad_value,
|
188
|
+
padding_side=self.padding_side,
|
177
189
|
shape=(batch_size, sequence_length),
|
178
190
|
)
|
179
191
|
outputs = tf.squeeze(outputs, axis=0) if unbatched else outputs
|
180
192
|
|
181
193
|
if self.return_padding_mask:
|
182
194
|
mask = tf.ones_like(x, dtype="bool")
|
183
|
-
|
195
|
+
|
196
|
+
mask = pad(
|
197
|
+
mask,
|
198
|
+
pad_value=False,
|
199
|
+
padding_side=self.padding_side,
|
200
|
+
shape=(batch_size, sequence_length),
|
201
|
+
)
|
184
202
|
mask = tf.squeeze(mask, axis=0) if unbatched else mask
|
185
203
|
return outputs, mask
|
186
|
-
|
187
204
|
return outputs
|
188
205
|
|
189
206
|
def get_config(self):
|
@@ -195,6 +212,7 @@ class StartEndPacker(PreprocessingLayer):
|
|
195
212
|
"end_value": self._end_value,
|
196
213
|
"pad_value": self.pad_value,
|
197
214
|
"return_padding_mask": self.return_padding_mask,
|
215
|
+
"padding_side": self.padding_side,
|
198
216
|
}
|
199
217
|
)
|
200
218
|
return config
|
keras_hub/src/models/backbone.py
CHANGED
@@ -189,23 +189,26 @@ class Backbone(keras.Model):
|
|
189
189
|
saver = get_preset_saver(preset_dir)
|
190
190
|
saver.save_backbone(self, max_shard_size=max_shard_size)
|
191
191
|
|
192
|
-
def
|
193
|
-
"""Returns list of layer names which are to be LoRA-fied.
|
194
|
-
|
195
|
-
Subclasses can override this method if the names of layers to be
|
196
|
-
LoRa-fied are different.
|
197
|
-
"""
|
192
|
+
def default_lora_layer_names(self):
|
193
|
+
"""Returns list of layer names which are to be LoRA-fied."""
|
198
194
|
return ["query_dense", "value_dense", "query", "value"]
|
199
195
|
|
200
|
-
def enable_lora(self, rank,
|
196
|
+
def enable_lora(self, rank, target_layer_names=None):
|
201
197
|
"""Enable Lora on the backbone.
|
202
198
|
|
203
199
|
Calling this method will freeze all weights on the backbone,
|
204
200
|
while enabling Lora on the query & value `EinsumDense` layers
|
205
201
|
of the attention layers.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
rank: The rank of the LoRA factorization.
|
205
|
+
target_layer_names: A list of strings, the names of the layers to
|
206
|
+
apply LoRA to. If `None`, this will be populated with the
|
207
|
+
default LoRA layer names as returned by
|
208
|
+
`backbone.default_lora_layer_names()`.
|
206
209
|
"""
|
207
|
-
if
|
208
|
-
|
210
|
+
if target_layer_names is None:
|
211
|
+
target_layer_names = self.default_lora_layer_names()
|
209
212
|
self.trainable = True
|
210
213
|
self._lora_enabled_layers = []
|
211
214
|
self._lora_rank = rank
|
@@ -214,7 +217,7 @@ class Backbone(keras.Model):
|
|
214
217
|
all_layers = self._flatten_layers(include_self=False)
|
215
218
|
all_layers = [lyr for lyr in all_layers if lyr.weights]
|
216
219
|
for i, layer in enumerate(all_layers):
|
217
|
-
for name in
|
220
|
+
for name in target_layer_names:
|
218
221
|
if layer.name == name:
|
219
222
|
if hasattr(layer, "enable_lora"):
|
220
223
|
layer.trainable = True
|
@@ -402,8 +402,8 @@ class Gemma3Backbone(Backbone):
|
|
402
402
|
)
|
403
403
|
return config
|
404
404
|
|
405
|
-
def
|
406
|
-
target_names = super().
|
405
|
+
def default_lora_layer_names(self):
|
406
|
+
target_names = super().default_lora_layer_names()
|
407
407
|
|
408
408
|
# Add these for `Gemma3VITAttention`.
|
409
409
|
if not self.text_only_model:
|
@@ -274,8 +274,8 @@ class PaliGemmaBackbone(Backbone):
|
|
274
274
|
# Keep the image_sequence_length as a backbone property for easy access.
|
275
275
|
self.image_sequence_length = self.vit_encoder.image_sequence_length
|
276
276
|
|
277
|
-
def
|
278
|
-
target_names = super().
|
277
|
+
def default_lora_layer_names(self):
|
278
|
+
target_names = super().default_lora_layer_names()
|
279
279
|
|
280
280
|
# Add these for `PaliGemmaVITAttention`.
|
281
281
|
target_names += ["query_proj", "value_proj"]
|
@@ -18,10 +18,10 @@ class ViTBackbone(Backbone):
|
|
18
18
|
|
19
19
|
Args:
|
20
20
|
image_shape: A tuple or list of 3 integers representing the shape of the
|
21
|
-
input image `(height, width, channels)
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
input image `(height, width, channels)`.
|
22
|
+
patch_size: int or (int, int). The size of each image patch, the input
|
23
|
+
image will be divided into patches of shape
|
24
|
+
`(patch_size_h, patch_size_w)`.
|
25
25
|
num_layers: int. The number of transformer encoder layers.
|
26
26
|
num_heads: int. specifying the number of attention heads in each
|
27
27
|
Transformer encoder layer.
|
@@ -37,6 +37,10 @@ class ViTBackbone(Backbone):
|
|
37
37
|
use_mha_bias: bool. Whether to use bias in the multi-head
|
38
38
|
attention layers.
|
39
39
|
use_mlp_bias: bool. Whether to use bias in the MLP layers.
|
40
|
+
use_class_token: bool. Whether to use class token to be part of
|
41
|
+
patch embedding. Defaults to `True`.
|
42
|
+
use_patch_bias: bool. Whether to use bias in Conv2d of patch embedding
|
43
|
+
layer. Defaults to `True`.
|
40
44
|
data_format: str. `"channels_last"` or `"channels_first"`, specifying
|
41
45
|
the data format for the input image. If `None`, defaults to
|
42
46
|
`"channels_last"`.
|
@@ -58,6 +62,8 @@ class ViTBackbone(Backbone):
|
|
58
62
|
layer_norm_epsilon=1e-6,
|
59
63
|
use_mha_bias=True,
|
60
64
|
use_mlp_bias=True,
|
65
|
+
use_class_token=True,
|
66
|
+
use_patch_bias=True,
|
61
67
|
data_format=None,
|
62
68
|
dtype=None,
|
63
69
|
**kwargs,
|
@@ -74,24 +80,34 @@ class ViTBackbone(Backbone):
|
|
74
80
|
f"at index {h_axis} (height) or {w_axis} (width). "
|
75
81
|
f"Image shape: {image_shape}"
|
76
82
|
)
|
77
|
-
|
83
|
+
|
84
|
+
if isinstance(patch_size, int):
|
85
|
+
patch_size = (patch_size, patch_size)
|
86
|
+
|
87
|
+
if image_shape[h_axis] % patch_size[0] != 0:
|
88
|
+
raise ValueError(
|
89
|
+
f"Input height {image_shape[h_axis]} should be divisible by "
|
90
|
+
f"patch size {patch_size[0]}."
|
91
|
+
)
|
92
|
+
|
93
|
+
if image_shape[w_axis] % patch_size[1] != 0:
|
78
94
|
raise ValueError(
|
79
|
-
f"
|
80
|
-
f"
|
81
|
-
f"indices {h_axis} and {w_axis} respectively. Image shape: "
|
82
|
-
f"{image_shape}"
|
95
|
+
f"Input width {image_shape[h_axis]} should be divisible by "
|
96
|
+
f"patch size {patch_size[1]}."
|
83
97
|
)
|
84
98
|
|
85
99
|
num_channels = image_shape[channels_axis]
|
86
100
|
|
87
101
|
# === Functional Model ===
|
88
|
-
inputs = keras.layers.Input(shape=image_shape)
|
102
|
+
inputs = keras.layers.Input(shape=image_shape, name="images")
|
89
103
|
|
90
104
|
x = ViTPatchingAndEmbedding(
|
91
|
-
image_size=image_shape[h_axis],
|
105
|
+
image_size=(image_shape[h_axis], image_shape[w_axis]),
|
92
106
|
patch_size=patch_size,
|
93
107
|
hidden_dim=hidden_dim,
|
94
108
|
num_channels=num_channels,
|
109
|
+
use_class_token=use_class_token,
|
110
|
+
use_patch_bias=use_patch_bias,
|
95
111
|
data_format=data_format,
|
96
112
|
dtype=dtype,
|
97
113
|
name="vit_patching_and_embedding",
|
@@ -130,6 +146,8 @@ class ViTBackbone(Backbone):
|
|
130
146
|
self.layer_norm_epsilon = layer_norm_epsilon
|
131
147
|
self.use_mha_bias = use_mha_bias
|
132
148
|
self.use_mlp_bias = use_mlp_bias
|
149
|
+
self.use_class_token = use_class_token
|
150
|
+
self.use_patch_bias = use_patch_bias
|
133
151
|
self.data_format = data_format
|
134
152
|
|
135
153
|
def get_config(self):
|
@@ -147,6 +165,8 @@ class ViTBackbone(Backbone):
|
|
147
165
|
"layer_norm_epsilon": self.layer_norm_epsilon,
|
148
166
|
"use_mha_bias": self.use_mha_bias,
|
149
167
|
"use_mlp_bias": self.use_mlp_bias,
|
168
|
+
"use_class_token": self.use_class_token,
|
169
|
+
"use_patch_bias": self.use_patch_bias,
|
150
170
|
}
|
151
171
|
)
|
152
172
|
return config
|
@@ -1,78 +1,8 @@
|
|
1
1
|
from keras_hub.src.api_export import keras_hub_export
|
2
2
|
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
|
3
3
|
from keras_hub.src.models.vit.vit_backbone import ViTBackbone
|
4
|
-
from keras_hub.src.utils.tensor_utils import preprocessing_function
|
5
4
|
|
6
5
|
|
7
6
|
@keras_hub_export("keras_hub.layers.ViTImageConverter")
|
8
7
|
class ViTImageConverter(ImageConverter):
|
9
|
-
"""Converts images to the format expected by a ViT model.
|
10
|
-
|
11
|
-
This layer performs image normalization using mean and standard deviation
|
12
|
-
values. By default, it uses the same normalization as the
|
13
|
-
"google/vit-large-patch16-224" model on Hugging Face:
|
14
|
-
`norm_mean=[0.5, 0.5, 0.5]` and `norm_std=[0.5, 0.5, 0.5]`
|
15
|
-
([reference](https://huggingface.co/google/vit-large-patch16-224/blob/main/preprocessor_config.json)).
|
16
|
-
These defaults are suitable for models pretrained using this normalization.
|
17
|
-
|
18
|
-
Args:
|
19
|
-
norm_mean: list or tuple of floats. Mean values for image normalization.
|
20
|
-
Defaults to `[0.5, 0.5, 0.5]`.
|
21
|
-
norm_std: list or tuple of floats. Standard deviation values for
|
22
|
-
image normalization. Defaults to `[0.5, 0.5, 0.5]`.
|
23
|
-
**kwargs: Additional keyword arguments passed to
|
24
|
-
`keras_hub.layers.preprocessing.ImageConverter`.
|
25
|
-
|
26
|
-
Examples:
|
27
|
-
```python
|
28
|
-
import keras
|
29
|
-
import numpy as np
|
30
|
-
from keras_hub.src.layers import ViTImageConverter
|
31
|
-
|
32
|
-
# Example image (replace with your actual image data)
|
33
|
-
image = np.random.rand(1, 224, 224, 3) # Example: (B, H, W, C)
|
34
|
-
|
35
|
-
# Create a ViTImageConverter instance
|
36
|
-
converter = ViTImageConverter(
|
37
|
-
image_size=(28,28),
|
38
|
-
scale=1/255.
|
39
|
-
)
|
40
|
-
# Preprocess the image
|
41
|
-
preprocessed_image = converter(image)
|
42
|
-
```
|
43
|
-
"""
|
44
|
-
|
45
8
|
backbone_cls = ViTBackbone
|
46
|
-
|
47
|
-
def __init__(
|
48
|
-
self, norm_mean=[0.5, 0.5, 0.5], norm_std=[0.5, 0.5, 0.5], **kwargs
|
49
|
-
):
|
50
|
-
super().__init__(**kwargs)
|
51
|
-
self.norm_mean = norm_mean
|
52
|
-
self.norm_std = norm_std
|
53
|
-
|
54
|
-
@preprocessing_function
|
55
|
-
def call(self, inputs):
|
56
|
-
# TODO: Remove this whole function. Why can just use scale and offset
|
57
|
-
# in the base class.
|
58
|
-
x = super().call(inputs)
|
59
|
-
if self.norm_mean:
|
60
|
-
norm_mean = self._expand_non_channel_dims(self.norm_mean, x)
|
61
|
-
x, norm_mean = self._convert_types(x, norm_mean, self.compute_dtype)
|
62
|
-
x = x - norm_mean
|
63
|
-
if self.norm_std:
|
64
|
-
norm_std = self._expand_non_channel_dims(self.norm_std, x)
|
65
|
-
x, norm_std = self._convert_types(x, norm_std, x.dtype)
|
66
|
-
x = x / norm_std
|
67
|
-
|
68
|
-
return x
|
69
|
-
|
70
|
-
def get_config(self):
|
71
|
-
config = super().get_config()
|
72
|
-
config.update(
|
73
|
-
{
|
74
|
-
"norm_mean": self.norm_mean,
|
75
|
-
"norm_std": self.norm_std,
|
76
|
-
}
|
77
|
-
)
|
78
|
-
return config
|
@@ -75,12 +75,13 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
|
|
75
75
|
"""Patches the image and embeds the patches.
|
76
76
|
|
77
77
|
Args:
|
78
|
-
image_size: int. Size of the input image
|
79
|
-
|
80
|
-
patch_size: int. Size of each image patch.
|
78
|
+
image_size: (int, int). Size of the input image.
|
79
|
+
patch_size: (int, int). Size of each image patch.
|
81
80
|
hidden_dim: int. Dimensionality of the patch embeddings.
|
82
81
|
num_channels: int. Number of channels in the input image. Defaults to
|
83
82
|
`3`.
|
83
|
+
use_class_token: bool. Whether to use class token to be part of
|
84
|
+
patch embedding. Defaults to `True`.
|
84
85
|
data_format: str. `"channels_last"` or `"channels_first"`. Defaults to
|
85
86
|
`None` (which uses `"channels_last"`).
|
86
87
|
**kwargs: Additional keyword arguments passed to `keras.layers.Layer`
|
@@ -92,12 +93,15 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
|
|
92
93
|
patch_size,
|
93
94
|
hidden_dim,
|
94
95
|
num_channels=3,
|
96
|
+
use_class_token=True,
|
97
|
+
use_patch_bias=True,
|
95
98
|
data_format=None,
|
96
99
|
**kwargs,
|
97
100
|
):
|
98
101
|
super().__init__(**kwargs)
|
99
|
-
|
100
|
-
|
102
|
+
grid_size = tuple([s // p for s, p in zip(image_size, patch_size)])
|
103
|
+
num_patches = grid_size[0] * grid_size[1]
|
104
|
+
num_positions = num_patches + 1 if use_class_token else num_patches
|
101
105
|
|
102
106
|
# === Config ===
|
103
107
|
self.image_size = image_size
|
@@ -106,19 +110,22 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
|
|
106
110
|
self.num_channels = num_channels
|
107
111
|
self.num_patches = num_patches
|
108
112
|
self.num_positions = num_positions
|
113
|
+
self.use_class_token = use_class_token
|
114
|
+
self.use_patch_bias = use_patch_bias
|
109
115
|
self.data_format = standardize_data_format(data_format)
|
110
116
|
|
111
117
|
def build(self, input_shape):
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
118
|
+
if self.use_class_token:
|
119
|
+
self.class_token = self.add_weight(
|
120
|
+
shape=(
|
121
|
+
1,
|
122
|
+
1,
|
123
|
+
self.hidden_dim,
|
124
|
+
),
|
125
|
+
initializer="random_normal",
|
126
|
+
dtype=self.variable_dtype,
|
127
|
+
name="class_token",
|
128
|
+
)
|
122
129
|
self.patch_embedding = keras.layers.Conv2D(
|
123
130
|
filters=self.hidden_dim,
|
124
131
|
kernel_size=self.patch_size,
|
@@ -127,6 +134,7 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
|
|
127
134
|
activation=None,
|
128
135
|
dtype=self.dtype_policy,
|
129
136
|
data_format=self.data_format,
|
137
|
+
use_bias=self.use_patch_bias,
|
130
138
|
name="patch_embedding",
|
131
139
|
)
|
132
140
|
self.patch_embedding.build(input_shape)
|
@@ -153,10 +161,16 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
|
|
153
161
|
patch_embeddings = ops.reshape(
|
154
162
|
patch_embeddings, [embeddings_shape[0], -1, embeddings_shape[-1]]
|
155
163
|
)
|
156
|
-
class_token = ops.tile(self.class_token, (embeddings_shape[0], 1, 1))
|
157
164
|
position_embeddings = self.position_embedding(self.position_ids)
|
158
|
-
|
159
|
-
|
165
|
+
|
166
|
+
if self.use_class_token:
|
167
|
+
class_token = ops.tile(
|
168
|
+
self.class_token, (embeddings_shape[0], 1, 1)
|
169
|
+
)
|
170
|
+
patch_embeddings = ops.concatenate(
|
171
|
+
[class_token, patch_embeddings], axis=1
|
172
|
+
)
|
173
|
+
return ops.add(patch_embeddings, position_embeddings)
|
160
174
|
|
161
175
|
def compute_output_shape(self, input_shape):
|
162
176
|
return (
|
@@ -175,6 +189,7 @@ class ViTPatchingAndEmbedding(keras.layers.Layer):
|
|
175
189
|
"num_channels": self.num_channels,
|
176
190
|
"num_patches": self.num_patches,
|
177
191
|
"num_positions": self.num_positions,
|
192
|
+
"use_class_token": self.use_class_token,
|
178
193
|
}
|
179
194
|
)
|
180
195
|
return config
|
@@ -11,7 +11,7 @@ backbone_presets = {
|
|
11
11
|
"params": 85798656,
|
12
12
|
"path": "vit",
|
13
13
|
},
|
14
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet/
|
14
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet/3",
|
15
15
|
},
|
16
16
|
"vit_base_patch16_384_imagenet": {
|
17
17
|
"metadata": {
|
@@ -22,7 +22,7 @@ backbone_presets = {
|
|
22
22
|
"params": 86090496,
|
23
23
|
"path": "vit",
|
24
24
|
},
|
25
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_384_imagenet/
|
25
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_384_imagenet/3",
|
26
26
|
},
|
27
27
|
"vit_large_patch16_224_imagenet": {
|
28
28
|
"metadata": {
|
@@ -33,7 +33,7 @@ backbone_presets = {
|
|
33
33
|
"params": 303301632,
|
34
34
|
"path": "vit",
|
35
35
|
},
|
36
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet/
|
36
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet/3",
|
37
37
|
},
|
38
38
|
"vit_large_patch16_384_imagenet": {
|
39
39
|
"metadata": {
|
@@ -44,7 +44,7 @@ backbone_presets = {
|
|
44
44
|
"params": 303690752,
|
45
45
|
"path": "vit",
|
46
46
|
},
|
47
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_384_imagenet/
|
47
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_384_imagenet/3",
|
48
48
|
},
|
49
49
|
"vit_base_patch32_384_imagenet": {
|
50
50
|
"metadata": {
|
@@ -55,7 +55,7 @@ backbone_presets = {
|
|
55
55
|
"params": 87528192,
|
56
56
|
"path": "vit",
|
57
57
|
},
|
58
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_384_imagenet/
|
58
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_384_imagenet/2",
|
59
59
|
},
|
60
60
|
"vit_large_patch32_384_imagenet": {
|
61
61
|
"metadata": {
|
@@ -66,7 +66,7 @@ backbone_presets = {
|
|
66
66
|
"params": 305607680,
|
67
67
|
"path": "vit",
|
68
68
|
},
|
69
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_384_imagenet/
|
69
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_384_imagenet/2",
|
70
70
|
},
|
71
71
|
"vit_base_patch16_224_imagenet21k": {
|
72
72
|
"metadata": {
|
@@ -77,7 +77,7 @@ backbone_presets = {
|
|
77
77
|
"params": 85798656,
|
78
78
|
"path": "vit",
|
79
79
|
},
|
80
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet21k/
|
80
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch16_224_imagenet21k/2",
|
81
81
|
},
|
82
82
|
"vit_base_patch32_224_imagenet21k": {
|
83
83
|
"metadata": {
|
@@ -88,7 +88,7 @@ backbone_presets = {
|
|
88
88
|
"params": 87455232,
|
89
89
|
"path": "vit",
|
90
90
|
},
|
91
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_224_imagenet21k/
|
91
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_base_patch32_224_imagenet21k/2",
|
92
92
|
},
|
93
93
|
"vit_huge_patch14_224_imagenet21k": {
|
94
94
|
"metadata": {
|
@@ -99,7 +99,7 @@ backbone_presets = {
|
|
99
99
|
"params": 630764800,
|
100
100
|
"path": "vit",
|
101
101
|
},
|
102
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_huge_patch14_224_imagenet21k/
|
102
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_huge_patch14_224_imagenet21k/2",
|
103
103
|
},
|
104
104
|
"vit_large_patch16_224_imagenet21k": {
|
105
105
|
"metadata": {
|
@@ -110,7 +110,7 @@ backbone_presets = {
|
|
110
110
|
"params": 303301632,
|
111
111
|
"path": "vit",
|
112
112
|
},
|
113
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet21k/
|
113
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch16_224_imagenet21k/2",
|
114
114
|
},
|
115
115
|
"vit_large_patch32_224_imagenet21k": {
|
116
116
|
"metadata": {
|
@@ -121,6 +121,6 @@ backbone_presets = {
|
|
121
121
|
"params": 305510400,
|
122
122
|
"path": "vit",
|
123
123
|
},
|
124
|
-
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_224_imagenet21k/
|
124
|
+
"kaggle_handle": "kaggle://keras/vit/keras/vit_large_patch32_224_imagenet21k/2",
|
125
125
|
},
|
126
126
|
}
|
@@ -21,6 +21,20 @@ except ImportError:
|
|
21
21
|
NO_CONVERT_COUNTER = threading.local()
|
22
22
|
|
23
23
|
|
24
|
+
def pad(x, shape, padding_side, pad_value):
|
25
|
+
if padding_side == "left":
|
26
|
+
x = x[..., ::-1]
|
27
|
+
|
28
|
+
outputs = x.to_tensor(
|
29
|
+
default_value=pad_value,
|
30
|
+
shape=shape,
|
31
|
+
)
|
32
|
+
|
33
|
+
if padding_side == "left":
|
34
|
+
outputs = outputs[..., ::-1]
|
35
|
+
return outputs
|
36
|
+
|
37
|
+
|
24
38
|
@contextlib.contextmanager
|
25
39
|
def no_convert_scope():
|
26
40
|
try:
|
keras_hub/src/version.py
CHANGED
@@ -5,7 +5,7 @@ keras_hub/models/__init__.py,sha256=itSzodVUeuX6HQnmsSXY0Wv-5Htbu397410R-SFW_4I,
|
|
5
5
|
keras_hub/samplers/__init__.py,sha256=aFQIkiqbZpi8vjrPp2MVII4QUfE-eQjra5fMeHsoy7k,886
|
6
6
|
keras_hub/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
keras_hub/src/api_export.py,sha256=9pQZK27JObxWZ96QPLBp1OBsjWigh1iuV6RglPGMRk0,1499
|
8
|
-
keras_hub/src/version.py,sha256=
|
8
|
+
keras_hub/src/version.py,sha256=DDvaRSyKJcjRMYdIJIroiLLIbnEZPXF5mlsR_VQNowQ,222
|
9
9
|
keras_hub/src/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
keras_hub/src/layers/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
keras_hub/src/layers/modeling/alibi_bias.py,sha256=1XBTHI52L_iJDhN_w5ydu_iMhCuTgQAxEPwcLA6BPuk,4411
|
@@ -28,11 +28,11 @@ keras_hub/src/layers/preprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
28
28
|
keras_hub/src/layers/preprocessing/audio_converter.py,sha256=YGh_kQw65a1Z6S5zzSNVP-ChyLYHq3-eOYpOS53xIN8,4156
|
29
29
|
keras_hub/src/layers/preprocessing/image_converter.py,sha256=p2CoSV_zfHIVZqLo1hQk2BdOL_RtBlr5wUtgpAmtwwY,15926
|
30
30
|
keras_hub/src/layers/preprocessing/masked_lm_mask_generator.py,sha256=itxWq3FHYlR0I7jKarQlSKbSmRLl9ut_UTSP3ZDwP0A,8162
|
31
|
-
keras_hub/src/layers/preprocessing/multi_segment_packer.py,sha256=
|
31
|
+
keras_hub/src/layers/preprocessing/multi_segment_packer.py,sha256=APP62tF9Tw4zah7oL5maSYRXMwcR4RwicZMhQq2wRxY,12509
|
32
32
|
keras_hub/src/layers/preprocessing/preprocessing_layer.py,sha256=WyX41b9Ev_YJ5uVQVOAqD0PQasMOPDoyDjl_PkzkAkE,687
|
33
33
|
keras_hub/src/layers/preprocessing/random_deletion.py,sha256=_EmBt4d8TTPLF3OQhA8HoBmej-BX_BocbjeW6jzi6Wo,9768
|
34
34
|
keras_hub/src/layers/preprocessing/random_swap.py,sha256=cV7HqMwu_JHTbhe9UMVAsZdOTLsukyZDteEBYp0idiM,9509
|
35
|
-
keras_hub/src/layers/preprocessing/start_end_packer.py,sha256=
|
35
|
+
keras_hub/src/layers/preprocessing/start_end_packer.py,sha256=F_yCyI6yyxAfunb37C0AzFX3lKjaZg08HMjUXOpjgwc,8642
|
36
36
|
keras_hub/src/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
37
|
keras_hub/src/metrics/bleu.py,sha256=pnid5azpAxO6vKEfUtAby3nH29OGbwYKgVGOGeoaA3I,13694
|
38
38
|
keras_hub/src/metrics/edit_distance.py,sha256=kjhe8uNjvv8aN49RyrKAbNi7a8_OlB8fMza0J_CfNQg,6353
|
@@ -43,7 +43,7 @@ keras_hub/src/metrics/rouge_n.py,sha256=JoFtmgjF4Ic263ny6bfD6vMHKreH9le3HnOOxemu
|
|
43
43
|
keras_hub/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
keras_hub/src/models/audio_to_text.py,sha256=XoOjXtKBX6K1fz-zOXcdVo3FpjuxCMnJZh2LQcYXb_0,2726
|
45
45
|
keras_hub/src/models/audio_to_text_preprocessor.py,sha256=GS-WWyJ6aSsPRxi_0bxvxA00h2mT2FEwSdAoQXAUYVI,3249
|
46
|
-
keras_hub/src/models/backbone.py,sha256=
|
46
|
+
keras_hub/src/models/backbone.py,sha256=utZP09_u5FpMGiq8jl3W98TCW8CysndwLw2VCs3BHz8,11780
|
47
47
|
keras_hub/src/models/causal_lm.py,sha256=ReaF-i3SHsCkHh4c28jM72QjMQ8x7yiCwG39FRb-7KE,16786
|
48
48
|
keras_hub/src/models/causal_lm_preprocessor.py,sha256=YY7VJZicdmnjDSWi9g4_pEpd5bdJK166GlWcapvokF0,6663
|
49
49
|
keras_hub/src/models/feature_pyramid_backbone.py,sha256=clEW-TTQSVJ_5qFNdDF0iABkin1p_xlBUFjJrC7T0IA,2247
|
@@ -196,7 +196,7 @@ keras_hub/src/models/gemma/gemma_tokenizer.py,sha256=FhcyNL4lo63MqOhTQPFr07-u3Bd
|
|
196
196
|
keras_hub/src/models/gemma/rms_normalization.py,sha256=fku-JEo2sNy-ytX7ySD1sRzdhRAPmYex_z8oFk1NiG8,833
|
197
197
|
keras_hub/src/models/gemma3/__init__.py,sha256=oPFadkdK5DRLD6sYx83iTetY5daWuSzmJilLjokHcbU,257
|
198
198
|
keras_hub/src/models/gemma3/gemma3_attention.py,sha256=VstFCTVsplcDNSgnyBcSpLgKn-pktJ39D5Ri-Bb7BQA,13628
|
199
|
-
keras_hub/src/models/gemma3/gemma3_backbone.py,sha256=
|
199
|
+
keras_hub/src/models/gemma3/gemma3_backbone.py,sha256=CaVUQAKrBd1b_7gF7dyTWLjJebzzMd24_3oUipVu5gE,16445
|
200
200
|
keras_hub/src/models/gemma3/gemma3_causal_lm.py,sha256=U3C9TWlIz8VefAxQ0wJ6bDz18wqHBie8B26Ub_nFZs4,13843
|
201
201
|
keras_hub/src/models/gemma3/gemma3_causal_lm_preprocessor.py,sha256=vjt4N-zr0Eb5kvkOR-WUgskDTNe64L_6tYnhyNb6xaE,29601
|
202
202
|
keras_hub/src/models/gemma3/gemma3_decoder_block.py,sha256=6PLlpDxxF67stDv74fw9nNgUHBWmTLx6qGygJwyu5FY,10819
|
@@ -286,7 +286,7 @@ keras_hub/src/models/opt/opt_causal_lm_preprocessor.py,sha256=xHfslVMOZlAIj2V2jI
|
|
286
286
|
keras_hub/src/models/opt/opt_presets.py,sha256=LrjgI5gbq4Cvfl_pmeCnKn4hS_V_0GYTeJaDc9tbeZM,1745
|
287
287
|
keras_hub/src/models/opt/opt_tokenizer.py,sha256=oDHeed4xf07tm14hj_C78BkzMuuRwRP2cRHmqYnObrs,2557
|
288
288
|
keras_hub/src/models/pali_gemma/__init__.py,sha256=uODWTlttOOchcTLpiYHCEWMXnDxIz8ZVIeYFQN2bd8o,288
|
289
|
-
keras_hub/src/models/pali_gemma/pali_gemma_backbone.py,sha256=
|
289
|
+
keras_hub/src/models/pali_gemma/pali_gemma_backbone.py,sha256=e1KAg4bmK1PrmYW-Ewx3vD7S2DlX9K8LmbRwv30VEkA,13643
|
290
290
|
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm.py,sha256=AViEs6YltUqWnIVo7J02JkXcanBgLSdwZwF56TVr8gc,11345
|
291
291
|
keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_preprocessor.py,sha256=F57y0fZ0wYYxfGIjfrJc1W9uQpViYFx5bvFjj5CqUbI,4814
|
292
292
|
keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py,sha256=24ABQ1vGlppV-KfWh0YqJjzM_Lu2GIwvyJ4X2XXie_A,5616
|
@@ -409,12 +409,12 @@ keras_hub/src/models/vgg/vgg_image_classifier_preprocessor.py,sha256=M7hBbDPws5Z
|
|
409
409
|
keras_hub/src/models/vgg/vgg_image_converter.py,sha256=FKVrSNNBxIkiKvApzf4TZxidBb1z917Xs9nooHCcRLM,324
|
410
410
|
keras_hub/src/models/vgg/vgg_presets.py,sha256=UL7a8hdZ22duMADXwVypGnc20ME-ywI4QjtXu15usEI,1491
|
411
411
|
keras_hub/src/models/vit/__init__.py,sha256=GH7x3VjEXZLm-4F-c9-55QZE0lP2OLVICH0Hr5YCp9A,239
|
412
|
-
keras_hub/src/models/vit/vit_backbone.py,sha256=
|
412
|
+
keras_hub/src/models/vit/vit_backbone.py,sha256=VnypiTAf0ORaBTVzdDOXsnKnQxKbrIlX9z9qOumZH50,6699
|
413
413
|
keras_hub/src/models/vit/vit_image_classifier.py,sha256=lMVxiD1_6drx7XQ7P7YzlqnFP7kT1zlMe84f-T3SDQI,6332
|
414
414
|
keras_hub/src/models/vit/vit_image_classifier_preprocessor.py,sha256=wu6YcBlXMWB9sKCPvmNdGBZKTLQt_HyHWS6P9nyDwsk,504
|
415
|
-
keras_hub/src/models/vit/vit_image_converter.py,sha256=
|
416
|
-
keras_hub/src/models/vit/vit_layers.py,sha256=
|
417
|
-
keras_hub/src/models/vit/vit_presets.py,sha256=
|
415
|
+
keras_hub/src/models/vit/vit_image_converter.py,sha256=JhdXcbfKu9pKSJZiaKk7FKf_CjSXztSa2rsBFQvlgAo,324
|
416
|
+
keras_hub/src/models/vit/vit_layers.py,sha256=c0ApxF7cMqeEEa0LcWrBhc6zIolwOFVb2HjzLV-q98k,13940
|
417
|
+
keras_hub/src/models/vit/vit_presets.py,sha256=mlLBJxxonru14fBiMnMF4ud-JgbJHclpVV3FsoIubrk,4479
|
418
418
|
keras_hub/src/models/vit_det/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
419
419
|
keras_hub/src/models/vit_det/vit_det_backbone.py,sha256=DOZ5J7c1t5PAZ6y0pMmBoQTMOUup7UoUrYVfCs69ltY,7697
|
420
420
|
keras_hub/src/models/vit_det/vit_layers.py,sha256=mnwu56chMc6zxmfp_hsLdR7TXYy1_YsWy1KwGX9M5Ic,19840
|
@@ -471,7 +471,7 @@ keras_hub/src/utils/keras_utils.py,sha256=2qrh4F-rqceVFSx0-cbsFBfWae5hBXFb_sEtPP
|
|
471
471
|
keras_hub/src/utils/pipeline_model.py,sha256=jgzB6NQPSl0KOu08N-TazfOnXnUJbZjH2EXXhx25Ftg,9084
|
472
472
|
keras_hub/src/utils/preset_utils.py,sha256=fx0gNqOTdvW-ZdP0Y3ZaCGE7frYBhwi3lG_GO0swG4w,34602
|
473
473
|
keras_hub/src/utils/python_utils.py,sha256=N8nWeO3san4YnGkffRXG3Ix7VEIMTKSN21FX5TuL7G8,202
|
474
|
-
keras_hub/src/utils/tensor_utils.py,sha256=
|
474
|
+
keras_hub/src/utils/tensor_utils.py,sha256=WrohV6-hvxtLE6rRRhtN4hy8GkHikV-NrRnVEYUwJQo,16133
|
475
475
|
keras_hub/src/utils/coco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
476
476
|
keras_hub/src/utils/coco/coco_utils.py,sha256=x_QnUUvZ92zoFzMJugiInHORc4NrMdWVBkpp8BAYF6s,2586
|
477
477
|
keras_hub/src/utils/imagenet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -502,7 +502,7 @@ keras_hub/src/utils/transformers/preset_loader.py,sha256=1nfS5xVsl-JROGXJXltTqV1
|
|
502
502
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=CYUHyA4y-B61r7NDnCsFb4t_UmSwZ1k9L-8gzEd6KRg,3339
|
503
503
|
keras_hub/tokenizers/__init__.py,sha256=uMjjm0mzUkRb0e4Ac_JK8aJ9cKGUi5UqmzWoWAFJprE,4164
|
504
504
|
keras_hub/utils/__init__.py,sha256=jXPqVGBpJr_PpYmqD8aDG-fRMlxH-ulqCR2SZMn288Y,646
|
505
|
-
keras_hub_nightly-0.
|
506
|
-
keras_hub_nightly-0.
|
507
|
-
keras_hub_nightly-0.
|
508
|
-
keras_hub_nightly-0.
|
505
|
+
keras_hub_nightly-0.22.0.dev202505290412.dist-info/METADATA,sha256=W4vT73-ho1j4QwQv59qS5xF4i6bWH5k7tHiUJ7-_y4k,7393
|
506
|
+
keras_hub_nightly-0.22.0.dev202505290412.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
507
|
+
keras_hub_nightly-0.22.0.dev202505290412.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
508
|
+
keras_hub_nightly-0.22.0.dev202505290412.dist-info/RECORD,,
|
File without changes
|