keras-hub-nightly 0.23.0.dev202508260411__py3-none-any.whl → 0.23.0.dev202508280418__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. keras_hub/layers/__init__.py +6 -0
  2. keras_hub/models/__init__.py +21 -0
  3. keras_hub/src/layers/modeling/position_embedding.py +21 -6
  4. keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
  5. keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
  6. keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
  7. keras_hub/src/models/backbone.py +10 -15
  8. keras_hub/src/models/d_fine/__init__.py +0 -0
  9. keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
  10. keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
  11. keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
  12. keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
  13. keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
  14. keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
  15. keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
  16. keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
  17. keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
  18. keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
  19. keras_hub/src/models/d_fine/d_fine_presets.py +2 -0
  20. keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
  21. keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
  22. keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
  23. keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
  24. keras_hub/src/models/parseq/__init__.py +0 -0
  25. keras_hub/src/models/parseq/parseq_backbone.py +134 -0
  26. keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
  27. keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
  28. keras_hub/src/models/parseq/parseq_decoder.py +418 -0
  29. keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
  30. keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
  31. keras_hub/src/tests/test_case.py +37 -1
  32. keras_hub/src/utils/preset_utils.py +49 -0
  33. keras_hub/src/utils/tensor_utils.py +23 -1
  34. keras_hub/src/utils/transformers/convert_vit.py +4 -1
  35. keras_hub/src/version.py +1 -1
  36. keras_hub/tokenizers/__init__.py +3 -0
  37. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/METADATA +1 -1
  38. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/RECORD +40 -20
  39. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/WHEEL +0 -0
  40. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,365 @@
1
+ import keras
2
+ import numpy as np
3
+
4
+ from keras_hub.src.models.d_fine.d_fine_attention import DFineMultiheadAttention
5
+ from keras_hub.src.utils.keras_utils import clone_initializer
6
+
7
+
8
+ class DFineEncoderLayer(keras.layers.Layer):
9
+ """Single encoder layer for D-FINE models.
10
+
11
+ This layer is the fundamental building block of the `DFineEncoder`. It
12
+ implements a standard transformer encoder layer with multi-head
13
+ self-attention (`DFineMultiheadAttention`) and a feed-forward network. It is
14
+ used to process and refine the feature sequences from the CNN backbone.
15
+
16
+ Args:
17
+ normalize_before: bool, Whether to apply layer normalization before
18
+ the attention and feed-forward sub-layers (pre-norm) or after
19
+ (post-norm).
20
+ encoder_hidden_dim: int, Hidden dimension size of the encoder.
21
+ num_attention_heads: int, Number of attention heads in multi-head
22
+ attention.
23
+ dropout: float, Dropout probability applied to attention outputs and
24
+ feed-forward outputs.
25
+ layer_norm_eps: float, Small constant added to the denominator for
26
+ numerical stability in layer normalization.
27
+ encoder_activation_function: str, Activation function used in the
28
+ feed-forward network.
29
+ activation_dropout: float, Dropout probability applied after the
30
+ activation function in the feed-forward network.
31
+ encoder_ffn_dim: int, Hidden dimension size of the feed-forward network.
32
+ **kwargs: Additional keyword arguments passed to the parent class.
33
+ kernel_initializer: str or Initializer, optional, Initializer for
34
+ the kernel weights. Defaults to `"glorot_uniform"`.
35
+ bias_initializer: str or Initializer, optional, Initializer for
36
+ the bias weights. Defaults to `"zeros"`.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ normalize_before,
42
+ encoder_hidden_dim,
43
+ num_attention_heads,
44
+ dropout,
45
+ layer_norm_eps,
46
+ encoder_activation_function,
47
+ activation_dropout,
48
+ encoder_ffn_dim,
49
+ kernel_initializer="glorot_uniform",
50
+ bias_initializer="zeros",
51
+ dtype=None,
52
+ **kwargs,
53
+ ):
54
+ super().__init__(dtype=dtype, **kwargs)
55
+ self.normalize_before = normalize_before
56
+ self.encoder_hidden_dim = encoder_hidden_dim
57
+ self.num_attention_heads = num_attention_heads
58
+ self.dropout_rate = dropout
59
+ self.layer_norm_eps = layer_norm_eps
60
+ self.encoder_activation_function = encoder_activation_function
61
+ self.activation_dropout_rate = activation_dropout
62
+ self.encoder_ffn_dim = encoder_ffn_dim
63
+ self.kernel_initializer = keras.initializers.get(kernel_initializer)
64
+ self.bias_initializer = keras.initializers.get(bias_initializer)
65
+ self.self_attn = DFineMultiheadAttention(
66
+ embedding_dim=self.encoder_hidden_dim,
67
+ num_heads=self.num_attention_heads,
68
+ dropout=self.dropout_rate,
69
+ dtype=self.dtype_policy,
70
+ kernel_initializer=clone_initializer(self.kernel_initializer),
71
+ bias_initializer=clone_initializer(self.bias_initializer),
72
+ name="self_attn",
73
+ )
74
+ self.self_attn_layer_norm = keras.layers.LayerNormalization(
75
+ epsilon=self.layer_norm_eps,
76
+ name="self_attn_layer_norm",
77
+ dtype=self.dtype_policy,
78
+ )
79
+ self.dropout_layer = keras.layers.Dropout(
80
+ rate=self.dropout_rate,
81
+ name="dropout_layer",
82
+ dtype=self.dtype_policy,
83
+ )
84
+ self.activation_fn_layer = keras.layers.Activation(
85
+ self.encoder_activation_function,
86
+ name="activation_fn_layer",
87
+ dtype=self.dtype_policy,
88
+ )
89
+ self.activation_dropout_layer = keras.layers.Dropout(
90
+ rate=self.activation_dropout_rate,
91
+ name="activation_dropout_layer",
92
+ dtype=self.dtype_policy,
93
+ )
94
+ self.fc1 = keras.layers.Dense(
95
+ self.encoder_ffn_dim,
96
+ name="fc1",
97
+ dtype=self.dtype_policy,
98
+ kernel_initializer=clone_initializer(self.kernel_initializer),
99
+ bias_initializer=clone_initializer(self.bias_initializer),
100
+ )
101
+ self.fc2 = keras.layers.Dense(
102
+ self.encoder_hidden_dim,
103
+ name="fc2",
104
+ dtype=self.dtype_policy,
105
+ kernel_initializer=clone_initializer(self.kernel_initializer),
106
+ bias_initializer=clone_initializer(self.bias_initializer),
107
+ )
108
+ self.final_layer_norm = keras.layers.LayerNormalization(
109
+ epsilon=self.layer_norm_eps,
110
+ name="final_layer_norm",
111
+ dtype=self.dtype_policy,
112
+ )
113
+
114
+ def build(self, input_shape):
115
+ self.self_attn.build(input_shape)
116
+ self.self_attn_layer_norm.build(input_shape)
117
+ self.fc1.build(input_shape)
118
+ self.fc2.build((input_shape[0], input_shape[1], self.encoder_ffn_dim))
119
+ self.final_layer_norm.build(input_shape)
120
+ super().build(input_shape)
121
+
122
+ def call(
123
+ self,
124
+ hidden_states,
125
+ attention_mask=None,
126
+ position_embeddings=None,
127
+ output_attentions=False,
128
+ training=None,
129
+ ):
130
+ residual = hidden_states
131
+ if self.normalize_before:
132
+ hidden_states = self.self_attn_layer_norm(
133
+ hidden_states, training=training
134
+ )
135
+ hidden_states, attn_weights = self.self_attn(
136
+ hidden_states=hidden_states,
137
+ attention_mask=attention_mask,
138
+ position_embeddings=position_embeddings,
139
+ output_attentions=output_attentions,
140
+ training=training,
141
+ )
142
+ hidden_states = self.dropout_layer(hidden_states, training=training)
143
+ hidden_states = residual + hidden_states
144
+ if not self.normalize_before:
145
+ hidden_states = self.self_attn_layer_norm(
146
+ hidden_states, training=training
147
+ )
148
+ if self.normalize_before:
149
+ hidden_states = self.final_layer_norm(
150
+ hidden_states, training=training
151
+ )
152
+ residual_ffn = hidden_states
153
+ hidden_states = self.fc1(hidden_states)
154
+ hidden_states = self.activation_fn_layer(hidden_states)
155
+ hidden_states = self.activation_dropout_layer(
156
+ hidden_states, training=training
157
+ )
158
+ hidden_states = self.fc2(hidden_states)
159
+ hidden_states = self.dropout_layer(hidden_states, training=training)
160
+ hidden_states = residual_ffn + hidden_states
161
+ if not self.normalize_before:
162
+ hidden_states = self.final_layer_norm(
163
+ hidden_states, training=training
164
+ )
165
+ if training:
166
+ dtype_name = keras.backend.standardize_dtype(self.compute_dtype)
167
+ if dtype_name == "float16":
168
+ clamp_value = np.finfo(np.float16).max - 1000.0
169
+ else: # float32, bfloat16
170
+ clamp_value = np.finfo(np.float32).max - 1000.0
171
+ hidden_states = keras.ops.clip(
172
+ hidden_states, x_min=-clamp_value, x_max=clamp_value
173
+ )
174
+ if output_attentions:
175
+ return hidden_states, attn_weights
176
+ return hidden_states
177
+
178
+ def compute_output_spec(
179
+ self,
180
+ hidden_states,
181
+ attention_mask=None,
182
+ position_embeddings=None,
183
+ output_attentions=False,
184
+ training=None,
185
+ ):
186
+ attn_output_spec = self.self_attn.compute_output_spec(
187
+ hidden_states,
188
+ position_embeddings,
189
+ attention_mask,
190
+ output_attentions,
191
+ )
192
+ if output_attentions:
193
+ hidden_states_output_spec, self_attn_weights_spec = attn_output_spec
194
+ return hidden_states_output_spec, self_attn_weights_spec
195
+ return attn_output_spec
196
+
197
+ def get_config(self):
198
+ config = super().get_config()
199
+ config.update(
200
+ {
201
+ "normalize_before": self.normalize_before,
202
+ "encoder_hidden_dim": self.encoder_hidden_dim,
203
+ "num_attention_heads": self.num_attention_heads,
204
+ "dropout": self.dropout_rate,
205
+ "layer_norm_eps": self.layer_norm_eps,
206
+ "encoder_activation_function": self.encoder_activation_function,
207
+ "activation_dropout": self.activation_dropout_rate,
208
+ "encoder_ffn_dim": self.encoder_ffn_dim,
209
+ "kernel_initializer": keras.initializers.serialize(
210
+ self.kernel_initializer
211
+ ),
212
+ "bias_initializer": keras.initializers.serialize(
213
+ self.bias_initializer
214
+ ),
215
+ }
216
+ )
217
+ return config
218
+
219
+
220
+ class DFineEncoder(keras.layers.Layer):
221
+ """Multi-layer encoder for D-FINE models.
222
+
223
+ This layer implements a stack of `DFineEncoderLayer` instances. It is used
224
+ within the `DFineHybridEncoder` to apply transformer-based processing to
225
+ the feature maps from the CNN backbone, creating rich contextual
226
+ representations before they are passed to the FPN/PAN pathways.
227
+
228
+ Args:
229
+ normalize_before: bool, Whether to apply layer normalization before
230
+ the attention and feed-forward sub-layers (pre-norm) or after
231
+ (post-norm) in each encoder layer.
232
+ encoder_hidden_dim: int, Hidden dimension size of the encoder layers.
233
+ num_attention_heads: int, Number of attention heads in multi-head
234
+ attention for each layer.
235
+ dropout: float, Dropout probability applied to attention outputs and
236
+ feed-forward outputs in each layer.
237
+ layer_norm_eps: float, Small constant added to the denominator for
238
+ numerical stability in layer normalization.
239
+ encoder_activation_function: str, Activation function used in the
240
+ feed-forward networks of each layer.
241
+ activation_dropout: float, Dropout probability applied after the
242
+ activation function in the feed-forward networks.
243
+ encoder_ffn_dim: int, Hidden dimension size of the feed-forward
244
+ networks in each layer.
245
+ num_encoder_layers: int, Number of encoder layers in the stack.
246
+ kernel_initializer: str or Initializer, optional, Initializer for
247
+ the kernel weights of each layer. Defaults to
248
+ `"glorot_uniform"`.
249
+ bias_initializer: str or Initializer, optional, Initializer for
250
+ the bias weights of each layer. Defaults to
251
+ `"zeros"`.
252
+ **kwargs: Additional keyword arguments passed to the parent class.
253
+ """
254
+
255
+ def __init__(
256
+ self,
257
+ normalize_before,
258
+ encoder_hidden_dim,
259
+ num_attention_heads,
260
+ dropout,
261
+ layer_norm_eps,
262
+ encoder_activation_function,
263
+ activation_dropout,
264
+ encoder_ffn_dim,
265
+ num_encoder_layers,
266
+ kernel_initializer="glorot_uniform",
267
+ bias_initializer="zeros",
268
+ dtype=None,
269
+ **kwargs,
270
+ ):
271
+ super().__init__(dtype=dtype, **kwargs)
272
+ self.normalize_before = normalize_before
273
+ self.encoder_hidden_dim = encoder_hidden_dim
274
+ self.num_attention_heads = num_attention_heads
275
+ self.dropout_rate = dropout
276
+ self.layer_norm_eps = layer_norm_eps
277
+ self.encoder_activation_function = encoder_activation_function
278
+ self.activation_dropout_rate = activation_dropout
279
+ self.encoder_ffn_dim = encoder_ffn_dim
280
+ self.num_encoder_layers = num_encoder_layers
281
+ self.kernel_initializer = kernel_initializer
282
+ self.bias_initializer = bias_initializer
283
+ self.encoder_layer = []
284
+ for i in range(self.num_encoder_layers):
285
+ layer = DFineEncoderLayer(
286
+ normalize_before=self.normalize_before,
287
+ encoder_hidden_dim=self.encoder_hidden_dim,
288
+ num_attention_heads=self.num_attention_heads,
289
+ dropout=self.dropout_rate,
290
+ layer_norm_eps=self.layer_norm_eps,
291
+ encoder_activation_function=self.encoder_activation_function,
292
+ activation_dropout=self.activation_dropout_rate,
293
+ encoder_ffn_dim=self.encoder_ffn_dim,
294
+ kernel_initializer=self.kernel_initializer,
295
+ bias_initializer=self.bias_initializer,
296
+ dtype=self.dtype_policy,
297
+ name=f"encoder_layer_{i}",
298
+ )
299
+ self.encoder_layer.append(layer)
300
+
301
+ def build(self, input_shape):
302
+ current_input_shape_for_layer = input_shape
303
+ for encoder_layer_instance in self.encoder_layer:
304
+ encoder_layer_instance.build(current_input_shape_for_layer)
305
+ super().build(input_shape)
306
+
307
+ def compute_output_spec(
308
+ self, src, src_mask=None, pos_embed=None, output_attentions=False
309
+ ):
310
+ if not self.encoder_layer:
311
+ if output_attentions:
312
+ return src, None
313
+ return src
314
+ encoder_layer_output_spec = self.encoder_layer[0].compute_output_spec(
315
+ hidden_states=src,
316
+ attention_mask=src_mask,
317
+ position_embeddings=pos_embed,
318
+ output_attentions=output_attentions,
319
+ )
320
+ if output_attentions:
321
+ return encoder_layer_output_spec
322
+ return encoder_layer_output_spec
323
+
324
+ def call(
325
+ self,
326
+ src,
327
+ src_mask=None,
328
+ pos_embed=None,
329
+ output_attentions=False,
330
+ training=None,
331
+ ):
332
+ current_hidden_tensor = src
333
+ last_layer_attn_weights = None
334
+
335
+ for encoder_layer_instance in self.encoder_layer:
336
+ current_hidden_tensor, layer_attn_weights = encoder_layer_instance(
337
+ hidden_states=current_hidden_tensor,
338
+ attention_mask=src_mask,
339
+ position_embeddings=pos_embed,
340
+ output_attentions=output_attentions,
341
+ training=training,
342
+ )
343
+ if output_attentions:
344
+ last_layer_attn_weights = layer_attn_weights
345
+
346
+ return current_hidden_tensor, last_layer_attn_weights
347
+
348
+ def get_config(self):
349
+ config = super().get_config()
350
+ config.update(
351
+ {
352
+ "normalize_before": self.normalize_before,
353
+ "encoder_hidden_dim": self.encoder_hidden_dim,
354
+ "num_attention_heads": self.num_attention_heads,
355
+ "dropout": self.dropout_rate,
356
+ "layer_norm_eps": self.layer_norm_eps,
357
+ "encoder_activation_function": self.encoder_activation_function,
358
+ "activation_dropout": self.activation_dropout_rate,
359
+ "encoder_ffn_dim": self.encoder_ffn_dim,
360
+ "num_encoder_layers": self.num_encoder_layers,
361
+ "kernel_initializer": self.kernel_initializer,
362
+ "bias_initializer": self.bias_initializer,
363
+ }
364
+ )
365
+ return config