keras-hub-nightly 0.22.0.dev202507150421__py3-none-any.whl → 0.22.0.dev202507170424__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. keras_hub/layers/__init__.py +3 -0
  2. keras_hub/models/__init__.py +3 -0
  3. keras_hub/src/models/clip/clip_backbone.py +3 -102
  4. keras_hub/src/models/clip/clip_layers.py +295 -0
  5. keras_hub/src/models/clip/clip_preprocessor.py +57 -48
  6. keras_hub/src/models/clip/clip_text_encoder.py +2 -2
  7. keras_hub/src/models/clip/clip_vision_encoder.py +3 -3
  8. keras_hub/src/models/dinov2/__init__.py +5 -0
  9. keras_hub/src/models/dinov2/dinov2_backbone.py +228 -0
  10. keras_hub/src/models/dinov2/dinov2_image_converter.py +8 -0
  11. keras_hub/src/models/dinov2/dinov2_layers.py +886 -0
  12. keras_hub/src/models/dinov2/dinov2_presets.py +4 -0
  13. keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +6 -2
  14. keras_hub/src/models/hgnetv2/__init__.py +5 -0
  15. keras_hub/src/models/hgnetv2/hgnetv2_presets.py +5 -5
  16. keras_hub/src/models/stable_diffusion_3/flow_match_euler_discrete_scheduler.py +16 -7
  17. keras_hub/src/models/stable_diffusion_3/mmdit.py +61 -4
  18. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +23 -32
  19. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +1 -0
  20. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +1 -0
  21. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +1 -0
  22. keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py +6 -2
  23. keras_hub/src/utils/preset_utils.py +4 -1
  24. keras_hub/src/utils/transformers/convert_dinov2.py +180 -0
  25. keras_hub/src/utils/transformers/export/gemma.py +89 -0
  26. keras_hub/src/utils/transformers/export/hf_exporter.py +98 -0
  27. keras_hub/src/utils/transformers/preset_loader.py +4 -1
  28. keras_hub/src/version.py +1 -1
  29. {keras_hub_nightly-0.22.0.dev202507150421.dist-info → keras_hub_nightly-0.22.0.dev202507170424.dist-info}/METADATA +1 -1
  30. {keras_hub_nightly-0.22.0.dev202507150421.dist-info → keras_hub_nightly-0.22.0.dev202507170424.dist-info}/RECORD +32 -25
  31. keras_hub/src/models/clip/clip_encoder_block.py +0 -111
  32. keras_hub/src/models/clip/clip_vision_embedding.py +0 -101
  33. {keras_hub_nightly-0.22.0.dev202507150421.dist-info → keras_hub_nightly-0.22.0.dev202507170424.dist-info}/WHEEL +0 -0
  34. {keras_hub_nightly-0.22.0.dev202507150421.dist-info → keras_hub_nightly-0.22.0.dev202507170424.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,228 @@
1
+ from keras import layers
2
+
3
+ from keras_hub.src.api_export import keras_hub_export
4
+ from keras_hub.src.models.backbone import Backbone
5
+ from keras_hub.src.models.dinov2.dinov2_layers import DINOV2Embedding
6
+ from keras_hub.src.models.dinov2.dinov2_layers import DINOV2Encoder
7
+ from keras_hub.src.utils.keras_utils import standardize_data_format
8
+
9
+
10
+ @keras_hub_export("keras_hub.models.DINOV2Backbone")
11
+ class DINOV2Backbone(Backbone):
12
+ """DINOV2 core network with hyperparameters.
13
+
14
+ DINOV2 offers a powerful, generalist visual backbone learned entirely from
15
+ unlabeled images as described in [DINOv2: Learning Robust Visual Features
16
+ without Supervision](https://arxiv.org/abs/2304.07193)
17
+
18
+ The default constructor gives a fully customizable, randomly initialized
19
+ DINOV2 model with any number of layers, heads, and embedding dimensions. To
20
+ load preset architectures and weights, use the `from_preset` constructor.
21
+
22
+ Note that this backbone supports interpolation of the position embeddings
23
+ to the input image shape. This is useful when the input image shape is
24
+ different from the shape used to train the position embeddings. The
25
+ `position_embedding_shape` argument is used to specify the original shape
26
+ used to train the position embeddings.
27
+
28
+ Args:
29
+ patch_size: int. The size of each square patch in the input image.
30
+ num_layers: int. The number of transformer layers.
31
+ hidden_dim: int. The size of the transformer hidden state at the end
32
+ of each transformer layer.
33
+ num_heads: int. The number of attention heads for each transformer.
34
+ intermediate_dim: int. The output dimension of the first Dense layer in
35
+ a two-layer feedforward network for each transformer.
36
+ layer_scale_init_value: float. The initial value for the layer scale in
37
+ the transformer layers. Defaults to `1.0`.
38
+ num_register_tokens: int. The number of register tokens to use in the
39
+ embedding layer. Defaults to `0`.
40
+ use_mask_token: bool. Whether to use a mask token in the embedding
41
+ layer. Defaults to `True`.
42
+ use_swiglu_ffn: bool. Whether to use SwigLU activation in the MLP
43
+ layers. Defaults to `False`.
44
+ dropout_rate: float. The dropout rate to use. Defaults to `0.0`.
45
+ drop_path_rate: float. The drop path rate to use. Defaults to `0.0`.
46
+ image_shape: tuple. The input shape without the batch size. Defaults to
47
+ `(224, 224, 3)`.
48
+ position_embedding_shape: tuple. The original shape used to train the
49
+ position embeddings. This is used to interpolate the position
50
+ embeddings to the actual input shape. Defaults to `(518, 518)`.
51
+ antialias_in_interpolation: bool. Whether to use antialiasing in the
52
+ interpolation of the position embeddings. Defaults to `False`.
53
+ data_format: `None` or str. If specified, either `"channels_last"` or
54
+ `"channels_first"`. The ordering of the dimensions in the
55
+ inputs. `"channels_last"` corresponds to inputs with shape
56
+ `(batch_size, height, width, channels)`
57
+ while `"channels_first"` corresponds to inputs with shape
58
+ `(batch_size, channels, height, width)`. It defaults to the
59
+ `image_data_format` value found in your Keras config file at
60
+ `~/.keras/keras.json`. If you never set it, then it will be
61
+ `"channels_last"`.
62
+ dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
63
+ for the models computations and weights. Note that some
64
+ computations, such as softmax and layer normalization will always
65
+ be done a float32 precision regardless of dtype.
66
+
67
+ Example:
68
+ ```python
69
+ # Pretrained DINOV2 model.
70
+ input_data = {
71
+ "images": np.ones(shape=(1, 518, 518, 3), dtype="float32"),
72
+ }
73
+ model = keras_hub.models.DINOV2Backbone.from_preset(
74
+ "dinov2_base"
75
+ )
76
+ model(input_data)
77
+
78
+ # Pretrained DINOV2 model with custom image shape.
79
+ input_data = {
80
+ "images": np.ones(shape=(1, 224, 224, 3), dtype="float32"),
81
+ }
82
+ model = keras_hub.models.DINOV2Backbone.from_preset(
83
+ "dinov2_base", image_shape=(224, 224, 3)
84
+ )
85
+ model(input_data)
86
+
87
+ # Randomly initialized DINOV2 model with custom config.
88
+ model = keras_hub.models.DINOV2Backbone(
89
+ patch_size=14,
90
+ num_layers=2,
91
+ hidden_dim=32,
92
+ num_heads=2,
93
+ intermediate_dim=128,
94
+ image_shape=(224, 224, 3),
95
+ position_embedding_shape=(518, 518),
96
+ )
97
+ model(input_data)
98
+ ```
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ patch_size,
104
+ num_layers,
105
+ hidden_dim,
106
+ num_heads,
107
+ intermediate_dim,
108
+ layer_scale_init_value=1.0,
109
+ num_register_tokens=0,
110
+ use_mask_token=True,
111
+ use_swiglu_ffn=False,
112
+ dropout_rate=0.0,
113
+ drop_path_rate=0.0,
114
+ image_shape=(224, 224, 3),
115
+ position_embedding_shape=(518, 518, 3),
116
+ antialias_in_interpolation=False,
117
+ data_format=None,
118
+ dtype=None,
119
+ name=None,
120
+ **kwargs,
121
+ ):
122
+ data_format = standardize_data_format(data_format)
123
+ if data_format == "channels_last":
124
+ height, width = image_shape[0], image_shape[1]
125
+ position_embedding_height, position_embedding_width = (
126
+ position_embedding_shape[0],
127
+ position_embedding_shape[1],
128
+ )
129
+ else:
130
+ height, width = image_shape[1], image_shape[2]
131
+ position_embedding_height, position_embedding_width = (
132
+ position_embedding_shape[1],
133
+ position_embedding_shape[2],
134
+ )
135
+ if height != width:
136
+ raise ValueError(
137
+ "`DINOV2Backbone` expects the height and width to be the "
138
+ f"same in `image_shape`. Received: image_shape={image_shape}"
139
+ )
140
+
141
+ # `prefix` is used to prevent duplicate name when utilizing multiple
142
+ # DINOV2Backbone encoders within a single model.
143
+ prefix = str(name) + "_" if name is not None else ""
144
+
145
+ # === Layers ===
146
+ self.embeddings = DINOV2Embedding(
147
+ hidden_dim=hidden_dim,
148
+ patch_size=patch_size,
149
+ image_shape=(height, width),
150
+ num_register_tokens=num_register_tokens,
151
+ use_mask_token=use_mask_token,
152
+ dropout_rate=dropout_rate,
153
+ position_embedding_shape=(
154
+ position_embedding_height,
155
+ position_embedding_width,
156
+ ),
157
+ antialias_in_interpolation=antialias_in_interpolation,
158
+ data_format=data_format,
159
+ dtype=dtype,
160
+ name=f"{prefix}embeddings",
161
+ )
162
+ self.encoder = DINOV2Encoder(
163
+ num_layers=num_layers,
164
+ hidden_dim=hidden_dim,
165
+ num_heads=num_heads,
166
+ intermediate_dim=intermediate_dim,
167
+ layer_scale_init_value=layer_scale_init_value,
168
+ use_swiglu_ffn=use_swiglu_ffn,
169
+ dropout_rate=dropout_rate,
170
+ drop_path_rate=drop_path_rate,
171
+ dtype=dtype,
172
+ name=f"{prefix}encoder",
173
+ )
174
+ self.layernorm = layers.LayerNormalization(
175
+ epsilon=1e-6, dtype=dtype, name=f"{prefix}layernorm"
176
+ )
177
+
178
+ # === Functional Model ===
179
+ image_input = layers.Input(shape=image_shape, name="images")
180
+ x = self.embeddings(image_input)
181
+ x = self.encoder(x)
182
+ x = self.layernorm(x)
183
+ outputs = x
184
+ super().__init__(
185
+ inputs={"images": image_input},
186
+ outputs=outputs,
187
+ dtype=dtype,
188
+ name=name,
189
+ **kwargs,
190
+ )
191
+
192
+ # === Config ===
193
+ self.patch_size = int(patch_size)
194
+ self.num_layers = int(num_layers)
195
+ self.hidden_dim = int(hidden_dim)
196
+ self.num_heads = int(num_heads)
197
+ self.intermediate_dim = int(intermediate_dim)
198
+ self.layer_scale_init_value = float(layer_scale_init_value)
199
+ self.num_register_tokens = int(num_register_tokens)
200
+ self.use_mask_token = bool(use_mask_token)
201
+ self.use_swiglu_ffn = bool(use_swiglu_ffn)
202
+ self.dropout_rate = float(dropout_rate)
203
+ self.drop_path_rate = float(drop_path_rate)
204
+ self.image_shape = image_shape
205
+ self.position_embedding_shape = position_embedding_shape
206
+ self.antialias_in_interpolation = bool(antialias_in_interpolation)
207
+
208
+ def get_config(self):
209
+ config = super().get_config()
210
+ config.update(
211
+ {
212
+ "patch_size": self.patch_size,
213
+ "num_layers": self.num_layers,
214
+ "hidden_dim": self.hidden_dim,
215
+ "num_heads": self.num_heads,
216
+ "intermediate_dim": self.intermediate_dim,
217
+ "layer_scale_init_value": self.layer_scale_init_value,
218
+ "num_register_tokens": self.num_register_tokens,
219
+ "use_mask_token": self.use_mask_token,
220
+ "use_swiglu_ffn": self.use_swiglu_ffn,
221
+ "dropout_rate": self.dropout_rate,
222
+ "drop_path_rate": self.drop_path_rate,
223
+ "image_shape": self.image_shape,
224
+ "position_embedding_shape": self.position_embedding_shape,
225
+ "antialias_in_interpolation": self.antialias_in_interpolation,
226
+ }
227
+ )
228
+ return config
@@ -0,0 +1,8 @@
1
+ from keras_hub.src.api_export import keras_hub_export
2
+ from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
3
+ from keras_hub.src.models.dinov2.dinov2_backbone import DINOV2Backbone
4
+
5
+
6
+ @keras_hub_export("keras_hub.layers.DINOV2ImageConverter")
7
+ class DINOV2ImageConverter(ImageConverter):
8
+ backbone_cls = DINOV2Backbone