keras-hub-nightly 0.16.1.dev202409230338__py3-none-any.whl → 0.16.1.dev202409250340__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +2 -0
- keras_hub/api/models/__init__.py +3 -0
- keras_hub/src/models/image_segmenter.py +86 -0
- keras_hub/src/models/sam/__init__.py +13 -0
- keras_hub/src/models/sam/sam_backbone.py +153 -0
- keras_hub/src/models/sam/sam_image_segmenter.py +237 -0
- keras_hub/src/models/sam/sam_layers.py +402 -0
- keras_hub/src/models/sam/sam_mask_decoder.py +270 -0
- keras_hub/src/models/sam/sam_prompt_encoder.py +336 -0
- keras_hub/src/models/sam/sam_transformer.py +159 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +17 -12
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202409230338.dist-info → keras_hub_nightly-0.16.1.dev202409250340.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.16.1.dev202409230338.dist-info → keras_hub_nightly-0.16.1.dev202409250340.dist-info}/RECORD +16 -8
- {keras_hub_nightly-0.16.1.dev202409230338.dist-info → keras_hub_nightly-0.16.1.dev202409250340.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.16.1.dev202409230338.dist-info → keras_hub_nightly-0.16.1.dev202409250340.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,336 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import keras
|
16
|
+
from keras import ops
|
17
|
+
|
18
|
+
from keras_hub.src.api_export import keras_hub_export
|
19
|
+
from keras_hub.src.models.sam.sam_layers import (
|
20
|
+
RandomFrequencyPositionalEmbeddings,
|
21
|
+
)
|
22
|
+
|
23
|
+
|
24
|
+
@keras_hub_export("keras_hub.layers.SAMPromptEncoder")
|
25
|
+
class SAMPromptEncoder(keras.layers.Layer):
|
26
|
+
"""Prompt Encoder for the Segment Anything Model (SAM).
|
27
|
+
|
28
|
+
The prompt encoder generates encodings for three types of prompts:
|
29
|
+
- Point prompts: Points on the image along with a label indicating whether
|
30
|
+
the point is in the foreground (part of the mask) or in the background
|
31
|
+
(not a part of the mask).
|
32
|
+
- Box prompts: A batch of bounding boxes with format [(x1, y1), (x2, y2)]
|
33
|
+
used to determine the location of the masks in the image.
|
34
|
+
- Masks: An input mask can be passed to refine the positional embeddings
|
35
|
+
for the output mask.
|
36
|
+
|
37
|
+
First, the point prompts and box prompts are concatenated and positional
|
38
|
+
encodings are generated using random spatial frequencies. A point is
|
39
|
+
represented as the sum of a positional encoding of the point's location
|
40
|
+
and one of two learned embeddings that indicate if the point is either in
|
41
|
+
the foreground or background. A box is represented by an embedding pair:
|
42
|
+
(1) the positional encoding of its top-left corner summed with a learned
|
43
|
+
embedding representing "top-left corner" and
|
44
|
+
(2) the same structure but using a learned embedding indicating
|
45
|
+
"bottom-right corner".
|
46
|
+
The box and point encodings are referred to as "prompt_sparse encodings"
|
47
|
+
If a mask prompt is passed, a convolutional neural net is used to
|
48
|
+
downscale it to generate "dense encodings". If no mask prompt is passed,
|
49
|
+
an embedding layer is used instead to generate a "no mask" embedding.
|
50
|
+
|
51
|
+
|
52
|
+
Args:
|
53
|
+
hidden_size: int, optional. The number of features in the output
|
54
|
+
embeddings. Defaults to `256`.
|
55
|
+
image_embedding_size: int, optional. The number of features in the
|
56
|
+
image embeddings generated by an image encoder. Defaults to
|
57
|
+
`(64, 64)`.
|
58
|
+
input_image_size: tuple[int], optional. A tuple of the height and
|
59
|
+
width of the image being prompted. Defaults to `(1024, 1024)`.
|
60
|
+
mask_in_channels: int, optional. The number of channels of the mask
|
61
|
+
prompt. Defaults to `16`.
|
62
|
+
activation: str, optional. The activation to use in the mask
|
63
|
+
downscaler neural net. Defaults to `"gelu"`.
|
64
|
+
"""
|
65
|
+
|
66
|
+
def __init__(
|
67
|
+
self,
|
68
|
+
*,
|
69
|
+
hidden_size=256,
|
70
|
+
image_embedding_size=(64, 64),
|
71
|
+
input_image_size=(1024, 1024),
|
72
|
+
mask_in_channels=16,
|
73
|
+
activation="gelu",
|
74
|
+
**kwargs
|
75
|
+
):
|
76
|
+
super().__init__(**kwargs)
|
77
|
+
self.hidden_size = hidden_size
|
78
|
+
self.image_embedding_size = image_embedding_size
|
79
|
+
self.input_image_size = input_image_size
|
80
|
+
self.mask_in_channels = mask_in_channels
|
81
|
+
self.activation = activation
|
82
|
+
|
83
|
+
self.positional_embedding_layer = RandomFrequencyPositionalEmbeddings(
|
84
|
+
num_positional_features=self.hidden_size // 2, scale=1
|
85
|
+
)
|
86
|
+
|
87
|
+
self.foreground_point_embed = keras.layers.Embedding(
|
88
|
+
1, hidden_size, name="foreground_point_embed"
|
89
|
+
)
|
90
|
+
self.background_point_embed = keras.layers.Embedding(
|
91
|
+
1, hidden_size, name="background_point_embed"
|
92
|
+
)
|
93
|
+
self.top_left_corner_embed = keras.layers.Embedding(
|
94
|
+
1, hidden_size, name="top_left_corner_embed"
|
95
|
+
)
|
96
|
+
self.bottom_right_corner_embed = keras.layers.Embedding(
|
97
|
+
1, hidden_size, name="bottom_right_corner_embed"
|
98
|
+
)
|
99
|
+
self.not_a_point_embed = keras.layers.Embedding(
|
100
|
+
1, hidden_size, name="not_a_point_embed"
|
101
|
+
)
|
102
|
+
|
103
|
+
self.mask_downscaler = keras.models.Sequential(
|
104
|
+
[
|
105
|
+
keras.layers.Conv2D(
|
106
|
+
mask_in_channels // 4, kernel_size=2, strides=2
|
107
|
+
),
|
108
|
+
keras.layers.LayerNormalization(epsilon=1e-6),
|
109
|
+
keras.layers.Activation(activation),
|
110
|
+
keras.layers.Conv2D(mask_in_channels, kernel_size=2, strides=2),
|
111
|
+
keras.layers.LayerNormalization(epsilon=1e-6),
|
112
|
+
keras.layers.Activation(activation),
|
113
|
+
keras.layers.Conv2D(hidden_size, kernel_size=1),
|
114
|
+
],
|
115
|
+
name="mask_downscaler",
|
116
|
+
)
|
117
|
+
self.no_mask_embed = keras.layers.Embedding(
|
118
|
+
1, hidden_size, name="no_mask_embed"
|
119
|
+
)
|
120
|
+
|
121
|
+
def build(
|
122
|
+
self,
|
123
|
+
points_shape=None,
|
124
|
+
labels_shape=None,
|
125
|
+
boxes_shape=None,
|
126
|
+
masks_shape=None,
|
127
|
+
):
|
128
|
+
self.positional_embedding_layer.build()
|
129
|
+
for layer in [
|
130
|
+
self.foreground_point_embed,
|
131
|
+
self.background_point_embed,
|
132
|
+
self.top_left_corner_embed,
|
133
|
+
self.bottom_right_corner_embed,
|
134
|
+
self.not_a_point_embed,
|
135
|
+
self.no_mask_embed,
|
136
|
+
]:
|
137
|
+
layer.build([None])
|
138
|
+
self.mask_downscaler.build(
|
139
|
+
[
|
140
|
+
None,
|
141
|
+
4 * self.image_embedding_size[0],
|
142
|
+
4 * self.image_embedding_size[1],
|
143
|
+
1,
|
144
|
+
]
|
145
|
+
)
|
146
|
+
self.built = True
|
147
|
+
|
148
|
+
def compute_output_shape(
|
149
|
+
self,
|
150
|
+
points_shape=None,
|
151
|
+
labels_shape=None,
|
152
|
+
boxes_shape=None,
|
153
|
+
masks_shape=None,
|
154
|
+
):
|
155
|
+
batch_size = None
|
156
|
+
for shape in (points_shape, labels_shape, boxes_shape, masks_shape):
|
157
|
+
if shape is not None:
|
158
|
+
batch_size = shape[0]
|
159
|
+
break
|
160
|
+
return {
|
161
|
+
"prompt_sparse_embeddings": (
|
162
|
+
batch_size,
|
163
|
+
None,
|
164
|
+
self.hidden_size,
|
165
|
+
),
|
166
|
+
"prompt_dense_embeddings": (
|
167
|
+
batch_size,
|
168
|
+
self.image_embedding_size[0],
|
169
|
+
self.image_embedding_size[1],
|
170
|
+
self.hidden_size,
|
171
|
+
),
|
172
|
+
"prompt_dense_positional_embeddings": (
|
173
|
+
batch_size,
|
174
|
+
self.image_embedding_size[0],
|
175
|
+
self.image_embedding_size[1],
|
176
|
+
self.hidden_size,
|
177
|
+
),
|
178
|
+
}
|
179
|
+
|
180
|
+
def _embed_points(self, points, labels):
|
181
|
+
points = points + 0.5
|
182
|
+
indices = ops.arange(1, dtype="int32")
|
183
|
+
|
184
|
+
point_embeddings = self.positional_embedding_layer.encode_coordinates(
|
185
|
+
points, self.input_image_size
|
186
|
+
)
|
187
|
+
labels = ops.broadcast_to(
|
188
|
+
labels[..., None], ops.shape(point_embeddings)
|
189
|
+
)
|
190
|
+
point_embeddings = ops.where(
|
191
|
+
labels == 0,
|
192
|
+
point_embeddings + self.background_point_embed(indices),
|
193
|
+
point_embeddings + self.foreground_point_embed(indices),
|
194
|
+
)
|
195
|
+
point_embeddings = ops.where(
|
196
|
+
labels == -1,
|
197
|
+
self.not_a_point_embed(indices),
|
198
|
+
point_embeddings,
|
199
|
+
)
|
200
|
+
return point_embeddings
|
201
|
+
|
202
|
+
def _embed_box(self, box):
|
203
|
+
shape = ops.shape(box)
|
204
|
+
batch_size, N = shape[0], shape[1]
|
205
|
+
box = box + 0.5
|
206
|
+
indices = ops.arange(1, dtype="int32")
|
207
|
+
corner_embedding = self.positional_embedding_layer.encode_coordinates(
|
208
|
+
box, self.input_image_size
|
209
|
+
)
|
210
|
+
top_left_embedding = corner_embedding[
|
211
|
+
:, :, 0, :
|
212
|
+
] + self.top_left_corner_embed(indices)
|
213
|
+
bottom_right_embedding = corner_embedding[
|
214
|
+
:, :, 1, :
|
215
|
+
] + self.bottom_right_corner_embed(indices)
|
216
|
+
corner_embedding = ops.stack(
|
217
|
+
[top_left_embedding, bottom_right_embedding], axis=2
|
218
|
+
)
|
219
|
+
return ops.reshape(
|
220
|
+
corner_embedding, (batch_size, N * 2, self.hidden_size)
|
221
|
+
)
|
222
|
+
|
223
|
+
def _embed_mask(self, mask):
|
224
|
+
mask_embedding = self.mask_downscaler(mask)
|
225
|
+
return mask_embedding
|
226
|
+
|
227
|
+
def call(
|
228
|
+
self, images=None, points=None, labels=None, boxes=None, masks=None
|
229
|
+
):
|
230
|
+
# Get the batch shape based on any arbitrary input, because batch
|
231
|
+
# shapes must all match.
|
232
|
+
valid_inputs = [
|
233
|
+
x for x in (points, labels, boxes, masks) if x is not None
|
234
|
+
]
|
235
|
+
|
236
|
+
batch_size = ops.shape(valid_inputs[0])[0]
|
237
|
+
if points is None:
|
238
|
+
points = ops.zeros((batch_size, 0, 2))
|
239
|
+
if labels is None:
|
240
|
+
labels = ops.zeros((batch_size, 0))
|
241
|
+
if boxes is None:
|
242
|
+
boxes = ops.zeros((batch_size, 0, 2, 2))
|
243
|
+
if masks is None:
|
244
|
+
masks = ops.zeros((batch_size, 0, 256, 256, 1))
|
245
|
+
|
246
|
+
# Compute point embeddings
|
247
|
+
point_embeddings = self._embed_points(points, labels)
|
248
|
+
|
249
|
+
# Compute box embeddings
|
250
|
+
box_embeddings = self._embed_box(boxes)
|
251
|
+
|
252
|
+
# Concatenate both into a sparse embeddings tensor
|
253
|
+
sparse_embeddings = ops.concatenate(
|
254
|
+
[point_embeddings, box_embeddings], axis=1
|
255
|
+
)
|
256
|
+
|
257
|
+
# Compute the mask embeddings
|
258
|
+
def _no_mask_embed():
|
259
|
+
reshaped_embed = ops.reshape(
|
260
|
+
self.no_mask_embed(ops.arange(1, dtype="int32")),
|
261
|
+
(1, 1, 1, self.hidden_size),
|
262
|
+
)
|
263
|
+
broadcasted_embed = ops.broadcast_to(
|
264
|
+
reshaped_embed,
|
265
|
+
shape=(
|
266
|
+
batch_size,
|
267
|
+
self.image_embedding_size[0],
|
268
|
+
self.image_embedding_size[1],
|
269
|
+
self.hidden_size,
|
270
|
+
),
|
271
|
+
)
|
272
|
+
return broadcasted_embed
|
273
|
+
|
274
|
+
def _maybe_input_mask_embed():
|
275
|
+
# Keras passes the masks as concrete tensors for both the
|
276
|
+
# true and false functions to build the output shape. So, we
|
277
|
+
# need to handle the case when 0 size masks is passed and
|
278
|
+
# dispatch the call to `_no_mask_embed`. Note that we can't call
|
279
|
+
# the lambda directly since the inputs are bound to different
|
280
|
+
# values when called with concrete values.
|
281
|
+
if masks.shape[1] == 0:
|
282
|
+
return ops.broadcast_to(
|
283
|
+
ops.reshape(
|
284
|
+
self.no_mask_embed(ops.arange(1, dtype="int32")),
|
285
|
+
(1, 1, 1, self.hidden_size),
|
286
|
+
),
|
287
|
+
shape=(
|
288
|
+
batch_size,
|
289
|
+
self.image_embedding_size[0],
|
290
|
+
self.image_embedding_size[1],
|
291
|
+
self.hidden_size,
|
292
|
+
),
|
293
|
+
)
|
294
|
+
shape = ops.shape(masks)
|
295
|
+
BM, N, height, width, channels = (
|
296
|
+
shape[0],
|
297
|
+
shape[1],
|
298
|
+
shape[2],
|
299
|
+
shape[3],
|
300
|
+
shape[4],
|
301
|
+
)
|
302
|
+
return self._embed_mask(
|
303
|
+
ops.reshape(masks, (BM * N, height, width, channels))
|
304
|
+
)
|
305
|
+
|
306
|
+
dense_embeddings = ops.cond(
|
307
|
+
ops.equal(ops.size(masks), 0),
|
308
|
+
_no_mask_embed,
|
309
|
+
_maybe_input_mask_embed,
|
310
|
+
)
|
311
|
+
|
312
|
+
# Compute the dense positional embeddings
|
313
|
+
prompt_dense_positional_embeddings = (
|
314
|
+
self.positional_embedding_layer.encode_image(
|
315
|
+
self.image_embedding_size
|
316
|
+
)[None, ...]
|
317
|
+
)
|
318
|
+
|
319
|
+
return {
|
320
|
+
"prompt_sparse_embeddings": sparse_embeddings,
|
321
|
+
"prompt_dense_embeddings": dense_embeddings,
|
322
|
+
"prompt_dense_positional_embeddings": prompt_dense_positional_embeddings,
|
323
|
+
}
|
324
|
+
|
325
|
+
def get_config(self):
|
326
|
+
config = super().get_config()
|
327
|
+
config.update(
|
328
|
+
{
|
329
|
+
"hidden_size": self.hidden_size,
|
330
|
+
"image_embedding_size": self.image_embedding_size,
|
331
|
+
"input_image_size": self.input_image_size,
|
332
|
+
"mask_in_channels": self.mask_in_channels,
|
333
|
+
"activation": self.activation,
|
334
|
+
}
|
335
|
+
)
|
336
|
+
return config
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import keras
|
16
|
+
from keras import ops
|
17
|
+
|
18
|
+
from keras_hub.src.models.sam.sam_layers import (
|
19
|
+
MultiHeadAttentionWithDownsampling,
|
20
|
+
)
|
21
|
+
from keras_hub.src.models.sam.sam_layers import TwoWayMultiHeadAttention
|
22
|
+
|
23
|
+
|
24
|
+
class TwoWayTransformer(keras.layers.Layer):
|
25
|
+
"""A two-way cross-attention transformer decoder.
|
26
|
+
|
27
|
+
A transformer decoder that attends to an input image using
|
28
|
+
queries whose positional embedding is supplied.
|
29
|
+
The transformer decoder design is shown in
|
30
|
+
[1](https://arxiv.org/abs/2304.02643).
|
31
|
+
Each decoder layer performs 4 steps:
|
32
|
+
(1) self-attention on the tokens,
|
33
|
+
(2) cross-attention from tokens (as queries) to the image embedding,
|
34
|
+
(3) a point-wise MLPupdates each token, and
|
35
|
+
(4) cross-attention from the image embedding (as
|
36
|
+
queries) to tokens. This last step updates the image embedding with prompt
|
37
|
+
information. Each self/cross-attention and MLP has a residual connection
|
38
|
+
and layer normalization.
|
39
|
+
To ensure the decoder has access to critical geometric information the
|
40
|
+
positional encodings are added to the image embedding whenever they
|
41
|
+
participate in an attention layer. Additionally, the entire original
|
42
|
+
prompt tokens (including their positional encodings) are re-added to the
|
43
|
+
updated tokens whenever they participate in an attention layer. This
|
44
|
+
allows for a strong dependence on both the prompt token's geometric
|
45
|
+
location and type.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
num_layers: int, optional. The num_layers of the attention blocks (the number
|
49
|
+
of attention blocks to use). Defaults to `2`.
|
50
|
+
hidden_size: int, optional. The number of features of the input image
|
51
|
+
and point embeddings. Defaults to `256`.
|
52
|
+
num_heads: int, optional. Number of heads to use in the attention
|
53
|
+
layers. Defaults to `8`.
|
54
|
+
intermediate_dim: int, optional. The number of units in the hidden layer of
|
55
|
+
the MLP block used in the attention layers. Defaults to `2048`.
|
56
|
+
activation: str, optional. The activation of the MLP block's output
|
57
|
+
layer used in the attention layers. Defaults to `"relu"`.
|
58
|
+
attention_downsample_rate: int, optional. The downsample rate of the
|
59
|
+
attention layers. Defaults to `2`.
|
60
|
+
"""
|
61
|
+
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
*,
|
65
|
+
num_layers=2,
|
66
|
+
hidden_size=256,
|
67
|
+
num_heads=8,
|
68
|
+
intermediate_dim=2048,
|
69
|
+
activation="relu",
|
70
|
+
attention_downsample_rate=2,
|
71
|
+
**kwargs,
|
72
|
+
):
|
73
|
+
super().__init__(**kwargs)
|
74
|
+
self.num_layers = num_layers
|
75
|
+
self.hidden_size = hidden_size
|
76
|
+
self.num_heads = num_heads
|
77
|
+
self.intermediate_dim = intermediate_dim
|
78
|
+
self.activation = activation
|
79
|
+
self.attention_downsample_rate = attention_downsample_rate
|
80
|
+
self.layers = []
|
81
|
+
for i in range(num_layers):
|
82
|
+
self.layers.append(
|
83
|
+
TwoWayMultiHeadAttention(
|
84
|
+
num_heads=num_heads,
|
85
|
+
key_dim=hidden_size // num_heads,
|
86
|
+
intermediate_dim=intermediate_dim,
|
87
|
+
skip_first_layer_pos_embedding=(i == 0),
|
88
|
+
attention_downsample_rate=attention_downsample_rate,
|
89
|
+
activation=activation,
|
90
|
+
dtype=self.dtype_policy,
|
91
|
+
)
|
92
|
+
)
|
93
|
+
self.final_attention_token_to_image = (
|
94
|
+
MultiHeadAttentionWithDownsampling(
|
95
|
+
num_heads=num_heads,
|
96
|
+
key_dim=hidden_size // num_heads,
|
97
|
+
downsample_rate=attention_downsample_rate,
|
98
|
+
dtype=self.dtype_policy,
|
99
|
+
)
|
100
|
+
)
|
101
|
+
self.final_layer_norm = keras.layers.LayerNormalization(
|
102
|
+
epsilon=1e-5, dtype=self.dtype_policy
|
103
|
+
)
|
104
|
+
|
105
|
+
def build(self, input_shape=None):
|
106
|
+
for layer in self.layers:
|
107
|
+
layer.build()
|
108
|
+
self.final_attention_token_to_image.build()
|
109
|
+
self.final_layer_norm.build([None, None, self.hidden_size])
|
110
|
+
self.built = True
|
111
|
+
|
112
|
+
def call(
|
113
|
+
self, image_embedding, image_positional_embeddings, point_embedding
|
114
|
+
):
|
115
|
+
shape = ops.shape(image_embedding)
|
116
|
+
B, H, W, C = shape[0], shape[1], shape[2], shape[3]
|
117
|
+
image_embedding = ops.reshape(image_embedding, (B, H * W, C))
|
118
|
+
|
119
|
+
shape = ops.shape(image_positional_embeddings)
|
120
|
+
B, H, W, C = shape[0], shape[1], shape[2], shape[3]
|
121
|
+
image_positional_embeddings = ops.reshape(
|
122
|
+
image_positional_embeddings, (B, H * W, C)
|
123
|
+
)
|
124
|
+
queries = point_embedding
|
125
|
+
keys = image_embedding
|
126
|
+
|
127
|
+
for layer in self.layers:
|
128
|
+
queries, keys = layer(
|
129
|
+
queries=queries,
|
130
|
+
keys=keys,
|
131
|
+
query_pos_embedding=point_embedding,
|
132
|
+
key_pos_embedding=image_positional_embeddings,
|
133
|
+
)
|
134
|
+
|
135
|
+
queries_with_pos_embedding = queries + point_embedding
|
136
|
+
keys_with_pos_embedding = keys + image_positional_embeddings
|
137
|
+
attention_map = self.final_attention_token_to_image(
|
138
|
+
query=queries_with_pos_embedding,
|
139
|
+
key=keys_with_pos_embedding,
|
140
|
+
value=keys,
|
141
|
+
)
|
142
|
+
queries = queries + attention_map
|
143
|
+
queries = self.final_layer_norm(queries)
|
144
|
+
|
145
|
+
return queries, keys
|
146
|
+
|
147
|
+
def get_config(self):
|
148
|
+
config = super().get_config()
|
149
|
+
config.update(
|
150
|
+
{
|
151
|
+
"num_layers": self.num_layers,
|
152
|
+
"hidden_size": self.hidden_size,
|
153
|
+
"num_heads": self.num_heads,
|
154
|
+
"intermediate_dim": self.intermediate_dim,
|
155
|
+
"activation": self.activation,
|
156
|
+
"attention_downsample_rate": self.attention_downsample_rate,
|
157
|
+
}
|
158
|
+
)
|
159
|
+
return config
|
@@ -104,7 +104,7 @@ class ViTDetBackbone(Backbone):
|
|
104
104
|
**kwargs
|
105
105
|
):
|
106
106
|
# === Functional model ===
|
107
|
-
img_input = keras.layers.Input(shape=image_shape)
|
107
|
+
img_input = keras.layers.Input(shape=image_shape, name="images")
|
108
108
|
# Check that the input image is well specified.
|
109
109
|
if img_input.shape[-3] is None or img_input.shape[-2] is None:
|
110
110
|
raise ValueError(
|
@@ -144,17 +144,22 @@ class ViTDetBackbone(Backbone):
|
|
144
144
|
),
|
145
145
|
input_size=(img_size // patch_size, img_size // patch_size),
|
146
146
|
)(x)
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
147
|
+
self.neck = keras.models.Sequential(
|
148
|
+
[
|
149
|
+
keras.layers.Conv2D(
|
150
|
+
filters=num_output_channels, kernel_size=1, use_bias=False
|
151
|
+
),
|
152
|
+
keras.layers.LayerNormalization(epsilon=1e-6),
|
153
|
+
keras.layers.Conv2D(
|
154
|
+
filters=num_output_channels,
|
155
|
+
kernel_size=3,
|
156
|
+
padding="same",
|
157
|
+
use_bias=False,
|
158
|
+
),
|
159
|
+
keras.layers.LayerNormalization(epsilon=1e-6),
|
160
|
+
]
|
161
|
+
)
|
162
|
+
x = self.neck(x)
|
158
163
|
|
159
164
|
super().__init__(inputs=img_input, outputs=x, **kwargs)
|
160
165
|
|
keras_hub/src/version_utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: keras-hub-nightly
|
3
|
-
Version: 0.16.1.
|
3
|
+
Version: 0.16.1.dev202409250340
|
4
4
|
Summary: Industry-strength Natural Language Processing extensions for Keras.
|
5
5
|
Home-page: https://github.com/keras-team/keras-hub
|
6
6
|
Author: Keras team
|
@@ -1,15 +1,15 @@
|
|
1
1
|
keras_hub/__init__.py,sha256=La-s5SQDd0312puWDSbPJ2XYxFXtg0jsCdUa2LMY-Z8,1440
|
2
2
|
keras_hub/api/__init__.py,sha256=8EwhEBO-o-92lvGv6M5zOdkNL9Bd3xfutlfGNJ8QwBE,1109
|
3
3
|
keras_hub/api/bounding_box/__init__.py,sha256=LNSVZLB1WJ9hMg0wxt7HTfFFd9uAFviH9x9CnfJYzBA,1682
|
4
|
-
keras_hub/api/layers/__init__.py,sha256=
|
4
|
+
keras_hub/api/layers/__init__.py,sha256=q-W0iSlSnbanCmfeXEfnzeM2Z_akIf0o0wXGI9eoXK4,2742
|
5
5
|
keras_hub/api/metrics/__init__.py,sha256=tgQfooPHzlq6w34RHfro6vO8IUITLTf-jU2IWEBxxUM,966
|
6
|
-
keras_hub/api/models/__init__.py,sha256=
|
6
|
+
keras_hub/api/models/__init__.py,sha256=FzHm8wzz4zFS9o47h74wZqD543M-YBpJBbg0MxVXrPQ,13597
|
7
7
|
keras_hub/api/samplers/__init__.py,sha256=l56H4y3h_HlRn_PpeMyZ6vC7228EH_BVFo4Caay-zQ8,1315
|
8
8
|
keras_hub/api/tokenizers/__init__.py,sha256=nzMwKmxkMCOiYB35BIgxHNveCM9WoYRp7ChhmVK8MIM,3042
|
9
9
|
keras_hub/api/utils/__init__.py,sha256=4IXDgmXqFzqrCK2MPgkih0Ye1s-8hrlBaUk-n5Kqwl4,800
|
10
10
|
keras_hub/src/__init__.py,sha256=lY7spwqXeGX_75qOHiSCff7FPvFCvRamJMF5ua9OWCg,585
|
11
11
|
keras_hub/src/api_export.py,sha256=82JzmDgnWTJR-PRJI9L_vjhW2Svz8gilbE1NMGZ2JgA,2085
|
12
|
-
keras_hub/src/version_utils.py,sha256=
|
12
|
+
keras_hub/src/version_utils.py,sha256=DfqQUnCphubGoSlfFEJS5b7zQfHnTOCqwLSi0WgDvfI,808
|
13
13
|
keras_hub/src/bounding_box/__init__.py,sha256=lY7spwqXeGX_75qOHiSCff7FPvFCvRamJMF5ua9OWCg,585
|
14
14
|
keras_hub/src/bounding_box/converters.py,sha256=V2ti6xPpaBgeLKbTpCsHsABdYOYASerIKX9oWqeOjHo,18450
|
15
15
|
keras_hub/src/bounding_box/formats.py,sha256=5bbHO-n2ADsKIOBJDHMvIPCeNBaV1_mj-NVCgBKNiu8,4453
|
@@ -56,6 +56,7 @@ keras_hub/src/models/causal_lm_preprocessor.py,sha256=VvHwIwnQyKzMDKTtW0CuWQ0faR
|
|
56
56
|
keras_hub/src/models/feature_pyramid_backbone.py,sha256=p4z7urzAAz0V6Q9WS57heaxWVLKW-11LoFKnXYxetUA,2832
|
57
57
|
keras_hub/src/models/image_classifier.py,sha256=72qxEL01DSKE-Ugg4tpZqkLQpYf15bPfpknBnbx_G8Q,3754
|
58
58
|
keras_hub/src/models/image_classifier_preprocessor.py,sha256=Az9596ow470lqCzYF0I-GUkHbVfWx4GiynvpwGws6f0,3199
|
59
|
+
keras_hub/src/models/image_segmenter.py,sha256=J8kcZzrVgQxQi21juGsOmCJaNWLYx_S8aGq6qss5TLA,3599
|
59
60
|
keras_hub/src/models/masked_lm.py,sha256=x8jeqgYsKsgeVPAirVRPHDdT21FAhqJ45pb8mIPc410,4161
|
60
61
|
keras_hub/src/models/masked_lm_preprocessor.py,sha256=Z6mo0szZp5Kfn6LmtY7EjZWGxLdR4c75hfw97V310Kc,6241
|
61
62
|
keras_hub/src/models/preprocessor.py,sha256=PZruA4xHS_w0-9hWLD1iJ79aOQMP81aJPYXl5SpjXak,7174
|
@@ -240,6 +241,13 @@ keras_hub/src/models/roberta/roberta_presets.py,sha256=Ys5WnfBCzrRDLVLrAm412ojHY
|
|
240
241
|
keras_hub/src/models/roberta/roberta_text_classifier.py,sha256=A4psd1Ef0ZSPMCsBpSLe5xmZqsFSn5XZ8gr_ekL9EoU,7268
|
241
242
|
keras_hub/src/models/roberta/roberta_text_classifier_preprocessor.py,sha256=xK0dGPi3nZ5mUoRtTSE8OhibQSaOvzkGELhPAJAB5sc,6579
|
242
243
|
keras_hub/src/models/roberta/roberta_tokenizer.py,sha256=RlKxa0eo7KYgRH5HSHrflna2LkB9pS6qjm2cr4DbuBg,3299
|
244
|
+
keras_hub/src/models/sam/__init__.py,sha256=lY7spwqXeGX_75qOHiSCff7FPvFCvRamJMF5ua9OWCg,585
|
245
|
+
keras_hub/src/models/sam/sam_backbone.py,sha256=Gv535P33JczvBxP7Z8n4vQCXJnzJAKd5YGdUn6ni3uQ,4937
|
246
|
+
keras_hub/src/models/sam/sam_image_segmenter.py,sha256=QW00mR0fVhqiSFVnYORCIGAa9HhDJNLYWi1fA6Kp1uM,8091
|
247
|
+
keras_hub/src/models/sam/sam_layers.py,sha256=6A4H2qiJSmpSUjp0xwaGzhQeAAUWi-lsETJjyfHiMV8,14448
|
248
|
+
keras_hub/src/models/sam/sam_mask_decoder.py,sha256=Iwq9-YQYVXtFXg7fUnu0BRyUMS6D_56AV2IOCVdlGb8,10135
|
249
|
+
keras_hub/src/models/sam/sam_prompt_encoder.py,sha256=rUinjN0yI8h6ewA_n0GWMJAg5FszS0x25ADdKiGAaBA,12387
|
250
|
+
keras_hub/src/models/sam/sam_transformer.py,sha256=V3UfDYldxXVW2jngVUS9Klu7HXwKf5ROfpTm3fPEtOo,6316
|
243
251
|
keras_hub/src/models/stable_diffusion_v3/__init__.py,sha256=lY7spwqXeGX_75qOHiSCff7FPvFCvRamJMF5ua9OWCg,585
|
244
252
|
keras_hub/src/models/stable_diffusion_v3/clip_encoder_block.py,sha256=6-bOVTGHCSniDYf616UhKmDHM239y8J5wdjZATXgxig,3556
|
245
253
|
keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py,sha256=90QYFvAlSk_F1HC80VG6IceVN0Q8paIHZQpbaG2pMec,3172
|
@@ -262,7 +270,7 @@ keras_hub/src/models/vgg/__init__.py,sha256=lY7spwqXeGX_75qOHiSCff7FPvFCvRamJMF5
|
|
262
270
|
keras_hub/src/models/vgg/vgg_backbone.py,sha256=O6onZEduEPt1J4v2HFgtHsxu-SheqpUwY2pYoeLa6uE,5080
|
263
271
|
keras_hub/src/models/vgg/vgg_image_classifier.py,sha256=cDcmHoHU1BZ211JakGPw3Z9lV22oMmK8J4-Ng8S07G0,4071
|
264
272
|
keras_hub/src/models/vit_det/__init__.py,sha256=lY7spwqXeGX_75qOHiSCff7FPvFCvRamJMF5ua9OWCg,585
|
265
|
-
keras_hub/src/models/vit_det/vit_det_backbone.py,sha256=
|
273
|
+
keras_hub/src/models/vit_det/vit_det_backbone.py,sha256=wRtFHqKSQ-d7Bzv4LJ-Uhjn013AsIP86wvHOrXfGzGU,8241
|
266
274
|
keras_hub/src/models/vit_det/vit_layers.py,sha256=JeUzOT2jmSOoJ_OiHOfLSkkCUZ5mlK5Mfd21DwudRCQ,20436
|
267
275
|
keras_hub/src/models/whisper/__init__.py,sha256=FI-xj6FwZDAAdCfKhOrE1_roQ8cXhD1gK4G6CLTvPQo,849
|
268
276
|
keras_hub/src/models/whisper/whisper_audio_converter.py,sha256=JqtA2kLUMFKZ4FrI8g2piEjahE-0-F3Yp4qQXS1cYf4,8973
|
@@ -328,7 +336,7 @@ keras_hub/src/utils/transformers/convert_mistral.py,sha256=4QStizMS6ESEPjSI-ls6j
|
|
328
336
|
keras_hub/src/utils/transformers/convert_pali_gemma.py,sha256=BT5eX1QzbjCQCopbMstiejQQWQiB_N77bpD5FMUygEo,11234
|
329
337
|
keras_hub/src/utils/transformers/preset_loader.py,sha256=9x9hLhDh_6PAHG5gay5rVoEVyt-gXTQGrnprjMLKvCM,3294
|
330
338
|
keras_hub/src/utils/transformers/safetensor_utils.py,sha256=2O8lcCf9yIFt5xiRVOtF1ZkPb5pfhOfDJotBaanD9Zo,3547
|
331
|
-
keras_hub_nightly-0.16.1.
|
332
|
-
keras_hub_nightly-0.16.1.
|
333
|
-
keras_hub_nightly-0.16.1.
|
334
|
-
keras_hub_nightly-0.16.1.
|
339
|
+
keras_hub_nightly-0.16.1.dev202409250340.dist-info/METADATA,sha256=I0qfaK-EcBkBLd3AKkbeGymIVDvZaufV75SvbuwSeOA,7061
|
340
|
+
keras_hub_nightly-0.16.1.dev202409250340.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
341
|
+
keras_hub_nightly-0.16.1.dev202409250340.dist-info/top_level.txt,sha256=N4J6piIWBKa38A4uV-CnIopnOEf8mHAbkNXafXm_CuA,10
|
342
|
+
keras_hub_nightly-0.16.1.dev202409250340.dist-info/RECORD,,
|
File without changes
|