keras-hub-nightly 0.16.1.dev202409240339__py3-none-any.whl → 0.16.1.dev202409260340__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. keras_hub/api/layers/__init__.py +5 -0
  2. keras_hub/api/models/__init__.py +19 -0
  3. keras_hub/api/tokenizers/__init__.py +1 -0
  4. keras_hub/src/models/{stable_diffusion_v3 → clip}/clip_encoder_block.py +8 -2
  5. keras_hub/src/models/clip/clip_preprocessor.py +147 -0
  6. keras_hub/src/models/{stable_diffusion_v3 → clip}/clip_text_encoder.py +60 -57
  7. keras_hub/src/models/{stable_diffusion_v3 → clip}/clip_tokenizer.py +69 -30
  8. keras_hub/src/models/densenet/__init__.py +6 -0
  9. keras_hub/src/models/densenet/densenet_backbone.py +11 -8
  10. keras_hub/src/models/densenet/densenet_image_classifier.py +27 -4
  11. keras_hub/src/models/densenet/densenet_image_classifier_preprocessor.py +27 -0
  12. keras_hub/src/models/densenet/densenet_image_converter.py +23 -0
  13. keras_hub/src/models/densenet/densenet_presets.py +56 -0
  14. keras_hub/src/models/image_segmenter.py +86 -0
  15. keras_hub/src/models/sam/__init__.py +13 -0
  16. keras_hub/src/models/sam/sam_backbone.py +153 -0
  17. keras_hub/src/models/sam/sam_image_segmenter.py +237 -0
  18. keras_hub/src/models/sam/sam_layers.py +402 -0
  19. keras_hub/src/models/sam/sam_mask_decoder.py +270 -0
  20. keras_hub/src/models/sam/sam_prompt_encoder.py +336 -0
  21. keras_hub/src/models/sam/sam_transformer.py +159 -0
  22. keras_hub/src/models/stable_diffusion_3/__init__.py +13 -0
  23. keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +93 -0
  24. keras_hub/src/models/{stable_diffusion_v3 → stable_diffusion_3}/mmdit.py +351 -26
  25. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +630 -0
  26. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +151 -0
  27. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +77 -0
  28. keras_hub/src/models/{stable_diffusion_v3/t5_xxl_text_encoder.py → stable_diffusion_3/t5_encoder.py} +7 -7
  29. keras_hub/src/models/stable_diffusion_3/vae_image_decoder.py +333 -0
  30. keras_hub/src/models/{stable_diffusion_v3/t5_xxl_preprocessor.py → t5/t5_preprocessor.py} +12 -3
  31. keras_hub/src/models/text_to_image.py +295 -0
  32. keras_hub/src/models/vit_det/vit_det_backbone.py +17 -12
  33. keras_hub/src/utils/timm/convert_densenet.py +107 -0
  34. keras_hub/src/utils/timm/preset_loader.py +3 -0
  35. keras_hub/src/version_utils.py +1 -1
  36. {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/METADATA +1 -1
  37. {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/RECORD +40 -24
  38. keras_hub/src/models/stable_diffusion_v3/clip_preprocessor.py +0 -93
  39. keras_hub/src/models/stable_diffusion_v3/mmdit_block.py +0 -317
  40. keras_hub/src/models/stable_diffusion_v3/vae_attention.py +0 -126
  41. keras_hub/src/models/stable_diffusion_v3/vae_image_decoder.py +0 -186
  42. /keras_hub/src/models/{stable_diffusion_v3 → clip}/__init__.py +0 -0
  43. {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/WHEEL +0 -0
  44. {keras_hub_nightly-0.16.1.dev202409240339.dist-info → keras_hub_nightly-0.16.1.dev202409260340.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,159 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import keras
16
+ from keras import ops
17
+
18
+ from keras_hub.src.models.sam.sam_layers import (
19
+ MultiHeadAttentionWithDownsampling,
20
+ )
21
+ from keras_hub.src.models.sam.sam_layers import TwoWayMultiHeadAttention
22
+
23
+
24
+ class TwoWayTransformer(keras.layers.Layer):
25
+ """A two-way cross-attention transformer decoder.
26
+
27
+ A transformer decoder that attends to an input image using
28
+ queries whose positional embedding is supplied.
29
+ The transformer decoder design is shown in
30
+ [1](https://arxiv.org/abs/2304.02643).
31
+ Each decoder layer performs 4 steps:
32
+ (1) self-attention on the tokens,
33
+ (2) cross-attention from tokens (as queries) to the image embedding,
34
+ (3) a point-wise MLPupdates each token, and
35
+ (4) cross-attention from the image embedding (as
36
+ queries) to tokens. This last step updates the image embedding with prompt
37
+ information. Each self/cross-attention and MLP has a residual connection
38
+ and layer normalization.
39
+ To ensure the decoder has access to critical geometric information the
40
+ positional encodings are added to the image embedding whenever they
41
+ participate in an attention layer. Additionally, the entire original
42
+ prompt tokens (including their positional encodings) are re-added to the
43
+ updated tokens whenever they participate in an attention layer. This
44
+ allows for a strong dependence on both the prompt token's geometric
45
+ location and type.
46
+
47
+ Args:
48
+ num_layers: int, optional. The num_layers of the attention blocks (the number
49
+ of attention blocks to use). Defaults to `2`.
50
+ hidden_size: int, optional. The number of features of the input image
51
+ and point embeddings. Defaults to `256`.
52
+ num_heads: int, optional. Number of heads to use in the attention
53
+ layers. Defaults to `8`.
54
+ intermediate_dim: int, optional. The number of units in the hidden layer of
55
+ the MLP block used in the attention layers. Defaults to `2048`.
56
+ activation: str, optional. The activation of the MLP block's output
57
+ layer used in the attention layers. Defaults to `"relu"`.
58
+ attention_downsample_rate: int, optional. The downsample rate of the
59
+ attention layers. Defaults to `2`.
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ *,
65
+ num_layers=2,
66
+ hidden_size=256,
67
+ num_heads=8,
68
+ intermediate_dim=2048,
69
+ activation="relu",
70
+ attention_downsample_rate=2,
71
+ **kwargs,
72
+ ):
73
+ super().__init__(**kwargs)
74
+ self.num_layers = num_layers
75
+ self.hidden_size = hidden_size
76
+ self.num_heads = num_heads
77
+ self.intermediate_dim = intermediate_dim
78
+ self.activation = activation
79
+ self.attention_downsample_rate = attention_downsample_rate
80
+ self.layers = []
81
+ for i in range(num_layers):
82
+ self.layers.append(
83
+ TwoWayMultiHeadAttention(
84
+ num_heads=num_heads,
85
+ key_dim=hidden_size // num_heads,
86
+ intermediate_dim=intermediate_dim,
87
+ skip_first_layer_pos_embedding=(i == 0),
88
+ attention_downsample_rate=attention_downsample_rate,
89
+ activation=activation,
90
+ dtype=self.dtype_policy,
91
+ )
92
+ )
93
+ self.final_attention_token_to_image = (
94
+ MultiHeadAttentionWithDownsampling(
95
+ num_heads=num_heads,
96
+ key_dim=hidden_size // num_heads,
97
+ downsample_rate=attention_downsample_rate,
98
+ dtype=self.dtype_policy,
99
+ )
100
+ )
101
+ self.final_layer_norm = keras.layers.LayerNormalization(
102
+ epsilon=1e-5, dtype=self.dtype_policy
103
+ )
104
+
105
+ def build(self, input_shape=None):
106
+ for layer in self.layers:
107
+ layer.build()
108
+ self.final_attention_token_to_image.build()
109
+ self.final_layer_norm.build([None, None, self.hidden_size])
110
+ self.built = True
111
+
112
+ def call(
113
+ self, image_embedding, image_positional_embeddings, point_embedding
114
+ ):
115
+ shape = ops.shape(image_embedding)
116
+ B, H, W, C = shape[0], shape[1], shape[2], shape[3]
117
+ image_embedding = ops.reshape(image_embedding, (B, H * W, C))
118
+
119
+ shape = ops.shape(image_positional_embeddings)
120
+ B, H, W, C = shape[0], shape[1], shape[2], shape[3]
121
+ image_positional_embeddings = ops.reshape(
122
+ image_positional_embeddings, (B, H * W, C)
123
+ )
124
+ queries = point_embedding
125
+ keys = image_embedding
126
+
127
+ for layer in self.layers:
128
+ queries, keys = layer(
129
+ queries=queries,
130
+ keys=keys,
131
+ query_pos_embedding=point_embedding,
132
+ key_pos_embedding=image_positional_embeddings,
133
+ )
134
+
135
+ queries_with_pos_embedding = queries + point_embedding
136
+ keys_with_pos_embedding = keys + image_positional_embeddings
137
+ attention_map = self.final_attention_token_to_image(
138
+ query=queries_with_pos_embedding,
139
+ key=keys_with_pos_embedding,
140
+ value=keys,
141
+ )
142
+ queries = queries + attention_map
143
+ queries = self.final_layer_norm(queries)
144
+
145
+ return queries, keys
146
+
147
+ def get_config(self):
148
+ config = super().get_config()
149
+ config.update(
150
+ {
151
+ "num_layers": self.num_layers,
152
+ "hidden_size": self.hidden_size,
153
+ "num_heads": self.num_heads,
154
+ "intermediate_dim": self.intermediate_dim,
155
+ "activation": self.activation,
156
+ "attention_downsample_rate": self.attention_downsample_rate,
157
+ }
158
+ )
159
+ return config
@@ -0,0 +1,13 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,93 @@
1
+ # Copyright 2024 The KerasHub Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from keras import layers
15
+ from keras import ops
16
+
17
+
18
+ class FlowMatchEulerDiscreteScheduler(layers.Layer):
19
+ """Flow-matching sampling euler scheduler.
20
+
21
+ This layer is used to compute the discrete sigmas for the diffusion chain.
22
+ Typically, the sigma refers to the amount of noise added during the
23
+ diffusion process.
24
+
25
+ Args:
26
+ num_train_timesteps: int. The number of diffusion steps to train the
27
+ model.
28
+ shift: float. The shift value for the timestep schedule.
29
+ **kwargs: other keyword arguments passed to `keras.layers.Layer`,
30
+ including `name`, `dtype` etc.
31
+
32
+ Call arguments:
33
+ inputs: The current step of the diffusion process.
34
+ num_steps: The total number of steps in the diffusion process.
35
+
36
+ References:
37
+ - [Common Diffusion Noise Schedules and Sample Steps are Flawed](
38
+ https://arxiv.org/abs/2305.08891).
39
+ - [Scaling Rectified Flow Transformers for High-Resolution Image Synthesis](
40
+ https://arxiv.org/abs/2403.03206).
41
+ """
42
+
43
+ def __init__(self, num_train_timesteps=1000, shift=1.0, **kwargs):
44
+ super().__init__(**kwargs)
45
+ self.num_train_timesteps = int(num_train_timesteps)
46
+ self.shift = float(shift)
47
+
48
+ timesteps = ops.linspace(
49
+ 1, num_train_timesteps, num_train_timesteps, dtype="float32"
50
+ )
51
+ timesteps = ops.flip(timesteps, axis=0)
52
+ sigmas = self._timestep_to_sigma(timesteps)
53
+
54
+ self.timesteps = ops.multiply(sigmas, num_train_timesteps)
55
+ self.sigma_min = sigmas[-1]
56
+ self.sigma_max = sigmas[0]
57
+
58
+ def _sigma_to_timestep(self, sigma):
59
+ return sigma * self.num_train_timesteps
60
+
61
+ def _timestep_to_sigma(self, timestep):
62
+ sigma = ops.divide(timestep, self.num_train_timesteps)
63
+ if self.shift != 1.0:
64
+ sigma = ops.divide(
65
+ ops.multiply(self.shift, sigma),
66
+ ops.add(1, ops.multiply(self.shift - 1.0, sigma)),
67
+ )
68
+ return sigma
69
+
70
+ def call(self, inputs, num_steps):
71
+ start = self._sigma_to_timestep(self.sigma_max)
72
+ end = self._sigma_to_timestep(self.sigma_min)
73
+ step_size = ops.divide(
74
+ ops.subtract(end, start), ops.subtract(num_steps, 1)
75
+ )
76
+ timestep = ops.add(start, ops.multiply(inputs, step_size))
77
+ sigma = ops.maximum(self._timestep_to_sigma(timestep), 0.0)
78
+ timestep = self._sigma_to_timestep(sigma)
79
+ return sigma, timestep
80
+
81
+ def get_config(self):
82
+ config = super().get_config()
83
+ config.update(
84
+ {
85
+ "num_train_timesteps": self.num_train_timesteps,
86
+ "shift": self.shift,
87
+ }
88
+ )
89
+ return config
90
+
91
+ def compute_output_shape(self):
92
+ # Returns a tuple of (sigma, timestep).
93
+ return (None,), (None,)