keras-hub-nightly 0.16.1.dev202409240339__py3-none-any.whl → 0.16.1.dev202409260340__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +5 -0
- keras_hub/api/models/__init__.py +19 -0
- keras_hub/api/tokenizers/__init__.py +1 -0
- keras_hub/src/models/{stable_diffusion_v3 → clip}/clip_encoder_block.py +8 -2
- keras_hub/src/models/clip/clip_preprocessor.py +147 -0
- keras_hub/src/models/{stable_diffusion_v3 → clip}/clip_text_encoder.py +60 -57
- keras_hub/src/models/{stable_diffusion_v3 → clip}/clip_tokenizer.py +69 -30
- keras_hub/src/models/densenet/__init__.py +6 -0
- keras_hub/src/models/densenet/densenet_backbone.py +11 -8
- keras_hub/src/models/densenet/densenet_image_classifier.py +27 -4
- keras_hub/src/models/densenet/densenet_image_classifier_preprocessor.py +27 -0
- keras_hub/src/models/densenet/densenet_image_converter.py +23 -0
- keras_hub/src/models/densenet/densenet_presets.py +56 -0
- keras_hub/src/models/image_segmenter.py +86 -0
- keras_hub/src/models/sam/__init__.py +13 -0
- keras_hub/src/models/sam/sam_backbone.py +153 -0
- keras_hub/src/models/sam/sam_image_segmenter.py +237 -0
- keras_hub/src/models/sam/sam_layers.py +402 -0
- keras_hub/src/models/sam/sam_mask_decoder.py +270 -0
- keras_hub/src/models/sam/sam_prompt_encoder.py +336 -0
- keras_hub/src/models/sam/sam_transformer.py +159 -0
- keras_hub/src/models/stable_diffusion_3/__init__.py +13 -0
- keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +93 -0
- keras_hub/src/models/{stable_diffusion_v3 → stable_diffusion_3}/mmdit.py +351 -26
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +630 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +151 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +77 -0
- keras_hub/src/models/{stable_diffusion_v3/t5_xxl_text_encoder.py → stable_diffusion_3/t5_encoder.py} +7 -7
- keras_hub/src/models/stable_diffusion_3/vae_image_decoder.py +333 -0
- keras_hub/src/models/{stable_diffusion_v3/t5_xxl_preprocessor.py → t5/t5_preprocessor.py} +12 -3
- keras_hub/src/models/text_to_image.py +295 -0
- keras_hub/src/models/vit_det/vit_det_backbone.py +17 -12
- keras_hub/src/utils/timm/convert_densenet.py +107 -0
- keras_hub/src/utils/timm/preset_loader.py +3 -0
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/RECORD +40 -24
- keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +0 -93
- keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +0 -317
- keras_hub/src/models/stable_diffusion_v3/vae_attention.py +0 -126
- keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +0 -186
- /keras_hub/src/models/{stable_diffusion_v3 → clip}/__init__.py +0 -0
- {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import keras
|
16
|
+
from keras import ops
|
17
|
+
|
18
|
+
from keras_hub.src.models.sam.sam_layers import (
|
19
|
+
MultiHeadAttentionWithDownsampling,
|
20
|
+
)
|
21
|
+
from keras_hub.src.models.sam.sam_layers import TwoWayMultiHeadAttention
|
22
|
+
|
23
|
+
|
24
|
+
class TwoWayTransformer(keras.layers.Layer):
|
25
|
+
"""A two-way cross-attention transformer decoder.
|
26
|
+
|
27
|
+
A transformer decoder that attends to an input image using
|
28
|
+
queries whose positional embedding is supplied.
|
29
|
+
The transformer decoder design is shown in
|
30
|
+
[1](https://arxiv.org/abs/2304.02643).
|
31
|
+
Each decoder layer performs 4 steps:
|
32
|
+
(1) self-attention on the tokens,
|
33
|
+
(2) cross-attention from tokens (as queries) to the image embedding,
|
34
|
+
(3) a point-wise MLPupdates each token, and
|
35
|
+
(4) cross-attention from the image embedding (as
|
36
|
+
queries) to tokens. This last step updates the image embedding with prompt
|
37
|
+
information. Each self/cross-attention and MLP has a residual connection
|
38
|
+
and layer normalization.
|
39
|
+
To ensure the decoder has access to critical geometric information the
|
40
|
+
positional encodings are added to the image embedding whenever they
|
41
|
+
participate in an attention layer. Additionally, the entire original
|
42
|
+
prompt tokens (including their positional encodings) are re-added to the
|
43
|
+
updated tokens whenever they participate in an attention layer. This
|
44
|
+
allows for a strong dependence on both the prompt token's geometric
|
45
|
+
location and type.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
num_layers: int, optional. The num_layers of the attention blocks (the number
|
49
|
+
of attention blocks to use). Defaults to `2`.
|
50
|
+
hidden_size: int, optional. The number of features of the input image
|
51
|
+
and point embeddings. Defaults to `256`.
|
52
|
+
num_heads: int, optional. Number of heads to use in the attention
|
53
|
+
layers. Defaults to `8`.
|
54
|
+
intermediate_dim: int, optional. The number of units in the hidden layer of
|
55
|
+
the MLP block used in the attention layers. Defaults to `2048`.
|
56
|
+
activation: str, optional. The activation of the MLP block's output
|
57
|
+
layer used in the attention layers. Defaults to `"relu"`.
|
58
|
+
attention_downsample_rate: int, optional. The downsample rate of the
|
59
|
+
attention layers. Defaults to `2`.
|
60
|
+
"""
|
61
|
+
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
*,
|
65
|
+
num_layers=2,
|
66
|
+
hidden_size=256,
|
67
|
+
num_heads=8,
|
68
|
+
intermediate_dim=2048,
|
69
|
+
activation="relu",
|
70
|
+
attention_downsample_rate=2,
|
71
|
+
**kwargs,
|
72
|
+
):
|
73
|
+
super().__init__(**kwargs)
|
74
|
+
self.num_layers = num_layers
|
75
|
+
self.hidden_size = hidden_size
|
76
|
+
self.num_heads = num_heads
|
77
|
+
self.intermediate_dim = intermediate_dim
|
78
|
+
self.activation = activation
|
79
|
+
self.attention_downsample_rate = attention_downsample_rate
|
80
|
+
self.layers = []
|
81
|
+
for i in range(num_layers):
|
82
|
+
self.layers.append(
|
83
|
+
TwoWayMultiHeadAttention(
|
84
|
+
num_heads=num_heads,
|
85
|
+
key_dim=hidden_size // num_heads,
|
86
|
+
intermediate_dim=intermediate_dim,
|
87
|
+
skip_first_layer_pos_embedding=(i == 0),
|
88
|
+
attention_downsample_rate=attention_downsample_rate,
|
89
|
+
activation=activation,
|
90
|
+
dtype=self.dtype_policy,
|
91
|
+
)
|
92
|
+
)
|
93
|
+
self.final_attention_token_to_image = (
|
94
|
+
MultiHeadAttentionWithDownsampling(
|
95
|
+
num_heads=num_heads,
|
96
|
+
key_dim=hidden_size // num_heads,
|
97
|
+
downsample_rate=attention_downsample_rate,
|
98
|
+
dtype=self.dtype_policy,
|
99
|
+
)
|
100
|
+
)
|
101
|
+
self.final_layer_norm = keras.layers.LayerNormalization(
|
102
|
+
epsilon=1e-5, dtype=self.dtype_policy
|
103
|
+
)
|
104
|
+
|
105
|
+
def build(self, input_shape=None):
|
106
|
+
for layer in self.layers:
|
107
|
+
layer.build()
|
108
|
+
self.final_attention_token_to_image.build()
|
109
|
+
self.final_layer_norm.build([None, None, self.hidden_size])
|
110
|
+
self.built = True
|
111
|
+
|
112
|
+
def call(
|
113
|
+
self, image_embedding, image_positional_embeddings, point_embedding
|
114
|
+
):
|
115
|
+
shape = ops.shape(image_embedding)
|
116
|
+
B, H, W, C = shape[0], shape[1], shape[2], shape[3]
|
117
|
+
image_embedding = ops.reshape(image_embedding, (B, H * W, C))
|
118
|
+
|
119
|
+
shape = ops.shape(image_positional_embeddings)
|
120
|
+
B, H, W, C = shape[0], shape[1], shape[2], shape[3]
|
121
|
+
image_positional_embeddings = ops.reshape(
|
122
|
+
image_positional_embeddings, (B, H * W, C)
|
123
|
+
)
|
124
|
+
queries = point_embedding
|
125
|
+
keys = image_embedding
|
126
|
+
|
127
|
+
for layer in self.layers:
|
128
|
+
queries, keys = layer(
|
129
|
+
queries=queries,
|
130
|
+
keys=keys,
|
131
|
+
query_pos_embedding=point_embedding,
|
132
|
+
key_pos_embedding=image_positional_embeddings,
|
133
|
+
)
|
134
|
+
|
135
|
+
queries_with_pos_embedding = queries + point_embedding
|
136
|
+
keys_with_pos_embedding = keys + image_positional_embeddings
|
137
|
+
attention_map = self.final_attention_token_to_image(
|
138
|
+
query=queries_with_pos_embedding,
|
139
|
+
key=keys_with_pos_embedding,
|
140
|
+
value=keys,
|
141
|
+
)
|
142
|
+
queries = queries + attention_map
|
143
|
+
queries = self.final_layer_norm(queries)
|
144
|
+
|
145
|
+
return queries, keys
|
146
|
+
|
147
|
+
def get_config(self):
|
148
|
+
config = super().get_config()
|
149
|
+
config.update(
|
150
|
+
{
|
151
|
+
"num_layers": self.num_layers,
|
152
|
+
"hidden_size": self.hidden_size,
|
153
|
+
"num_heads": self.num_heads,
|
154
|
+
"intermediate_dim": self.intermediate_dim,
|
155
|
+
"activation": self.activation,
|
156
|
+
"attention_downsample_rate": self.attention_downsample_rate,
|
157
|
+
}
|
158
|
+
)
|
159
|
+
return config
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# Copyright 2024 The KerasHub Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
from keras import layers
|
15
|
+
from keras import ops
|
16
|
+
|
17
|
+
|
18
|
+
class FlowMatchEulerDiscreteScheduler(layers.Layer):
|
19
|
+
"""Flow-matching sampling euler scheduler.
|
20
|
+
|
21
|
+
This layer is used to compute the discrete sigmas for the diffusion chain.
|
22
|
+
Typically, the sigma refers to the amount of noise added during the
|
23
|
+
diffusion process.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
num_train_timesteps: int. The number of diffusion steps to train the
|
27
|
+
model.
|
28
|
+
shift: float. The shift value for the timestep schedule.
|
29
|
+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
|
30
|
+
including `name`, `dtype` etc.
|
31
|
+
|
32
|
+
Call arguments:
|
33
|
+
inputs: The current step of the diffusion process.
|
34
|
+
num_steps: The total number of steps in the diffusion process.
|
35
|
+
|
36
|
+
References:
|
37
|
+
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](
|
38
|
+
https://arxiv.org/abs/2305.08891).
|
39
|
+
- [Scaling Rectified Flow Transformers for High-Resolution Image Synthesis](
|
40
|
+
https://arxiv.org/abs/2403.03206).
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(self, num_train_timesteps=1000, shift=1.0, **kwargs):
|
44
|
+
super().__init__(**kwargs)
|
45
|
+
self.num_train_timesteps = int(num_train_timesteps)
|
46
|
+
self.shift = float(shift)
|
47
|
+
|
48
|
+
timesteps = ops.linspace(
|
49
|
+
1, num_train_timesteps, num_train_timesteps, dtype="float32"
|
50
|
+
)
|
51
|
+
timesteps = ops.flip(timesteps, axis=0)
|
52
|
+
sigmas = self._timestep_to_sigma(timesteps)
|
53
|
+
|
54
|
+
self.timesteps = ops.multiply(sigmas, num_train_timesteps)
|
55
|
+
self.sigma_min = sigmas[-1]
|
56
|
+
self.sigma_max = sigmas[0]
|
57
|
+
|
58
|
+
def _sigma_to_timestep(self, sigma):
|
59
|
+
return sigma * self.num_train_timesteps
|
60
|
+
|
61
|
+
def _timestep_to_sigma(self, timestep):
|
62
|
+
sigma = ops.divide(timestep, self.num_train_timesteps)
|
63
|
+
if self.shift != 1.0:
|
64
|
+
sigma = ops.divide(
|
65
|
+
ops.multiply(self.shift, sigma),
|
66
|
+
ops.add(1, ops.multiply(self.shift - 1.0, sigma)),
|
67
|
+
)
|
68
|
+
return sigma
|
69
|
+
|
70
|
+
def call(self, inputs, num_steps):
|
71
|
+
start = self._sigma_to_timestep(self.sigma_max)
|
72
|
+
end = self._sigma_to_timestep(self.sigma_min)
|
73
|
+
step_size = ops.divide(
|
74
|
+
ops.subtract(end, start), ops.subtract(num_steps, 1)
|
75
|
+
)
|
76
|
+
timestep = ops.add(start, ops.multiply(inputs, step_size))
|
77
|
+
sigma = ops.maximum(self._timestep_to_sigma(timestep), 0.0)
|
78
|
+
timestep = self._sigma_to_timestep(sigma)
|
79
|
+
return sigma, timestep
|
80
|
+
|
81
|
+
def get_config(self):
|
82
|
+
config = super().get_config()
|
83
|
+
config.update(
|
84
|
+
{
|
85
|
+
"num_train_timesteps": self.num_train_timesteps,
|
86
|
+
"shift": self.shift,
|
87
|
+
}
|
88
|
+
)
|
89
|
+
return config
|
90
|
+
|
91
|
+
def compute_output_shape(self):
|
92
|
+
# Returns a tuple of (sigma, timestep).
|
93
|
+
return (None,), (None,)
|