keras-hub-nightly 0.23.0.dev202508260411__py3-none-any.whl → 0.23.0.dev202508280418__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. keras_hub/layers/__init__.py +6 -0
  2. keras_hub/models/__init__.py +21 -0
  3. keras_hub/src/layers/modeling/position_embedding.py +21 -6
  4. keras_hub/src/layers/modeling/rotary_embedding.py +16 -6
  5. keras_hub/src/layers/modeling/sine_position_encoding.py +21 -8
  6. keras_hub/src/layers/modeling/token_and_position_embedding.py +2 -1
  7. keras_hub/src/models/backbone.py +10 -15
  8. keras_hub/src/models/d_fine/__init__.py +0 -0
  9. keras_hub/src/models/d_fine/d_fine_attention.py +461 -0
  10. keras_hub/src/models/d_fine/d_fine_backbone.py +891 -0
  11. keras_hub/src/models/d_fine/d_fine_decoder.py +944 -0
  12. keras_hub/src/models/d_fine/d_fine_encoder.py +365 -0
  13. keras_hub/src/models/d_fine/d_fine_hybrid_encoder.py +642 -0
  14. keras_hub/src/models/d_fine/d_fine_image_converter.py +8 -0
  15. keras_hub/src/models/d_fine/d_fine_layers.py +1828 -0
  16. keras_hub/src/models/d_fine/d_fine_loss.py +938 -0
  17. keras_hub/src/models/d_fine/d_fine_object_detector.py +875 -0
  18. keras_hub/src/models/d_fine/d_fine_object_detector_preprocessor.py +14 -0
  19. keras_hub/src/models/d_fine/d_fine_presets.py +2 -0
  20. keras_hub/src/models/d_fine/d_fine_utils.py +827 -0
  21. keras_hub/src/models/hgnetv2/hgnetv2_backbone.py +4 -1
  22. keras_hub/src/models/hgnetv2/hgnetv2_encoder.py +3 -2
  23. keras_hub/src/models/hgnetv2/hgnetv2_layers.py +27 -11
  24. keras_hub/src/models/parseq/__init__.py +0 -0
  25. keras_hub/src/models/parseq/parseq_backbone.py +134 -0
  26. keras_hub/src/models/parseq/parseq_causal_lm.py +466 -0
  27. keras_hub/src/models/parseq/parseq_causal_lm_preprocessor.py +168 -0
  28. keras_hub/src/models/parseq/parseq_decoder.py +418 -0
  29. keras_hub/src/models/parseq/parseq_image_converter.py +8 -0
  30. keras_hub/src/models/parseq/parseq_tokenizer.py +221 -0
  31. keras_hub/src/tests/test_case.py +37 -1
  32. keras_hub/src/utils/preset_utils.py +49 -0
  33. keras_hub/src/utils/tensor_utils.py +23 -1
  34. keras_hub/src/utils/transformers/convert_vit.py +4 -1
  35. keras_hub/src/version.py +1 -1
  36. keras_hub/tokenizers/__init__.py +3 -0
  37. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/METADATA +1 -1
  38. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/RECORD +40 -20
  39. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/WHEEL +0 -0
  40. {keras_hub_nightly-0.23.0.dev202508260411.dist-info → keras_hub_nightly-0.23.0.dev202508280418.dist-info}/top_level.txt +0 -0
@@ -157,7 +157,10 @@ class HGNetV2Backbone(Backbone):
157
157
  if stage_name in self.out_features
158
158
  }
159
159
  super().__init__(
160
- inputs=pixel_values, outputs=feature_maps_output, **kwargs
160
+ inputs=pixel_values,
161
+ outputs=feature_maps_output,
162
+ dtype=dtype,
163
+ **kwargs,
161
164
  )
162
165
 
163
166
  # === Config ===
@@ -56,9 +56,10 @@ class HGNetV2Encoder(keras.layers.Layer):
56
56
  use_learnable_affine_block,
57
57
  data_format=None,
58
58
  channel_axis=None,
59
+ dtype=None,
59
60
  **kwargs,
60
61
  ):
61
- super().__init__(**kwargs)
62
+ super().__init__(dtype=dtype, **kwargs)
62
63
  self.stage_in_channels = stage_in_channels
63
64
  self.stage_mid_channels = stage_mid_channels
64
65
  self.stage_out_channels = stage_out_channels
@@ -90,7 +91,7 @@ class HGNetV2Encoder(keras.layers.Layer):
90
91
  name=f"{self.name}_stage_{stage_idx}"
91
92
  if self.name
92
93
  else f"stage_{stage_idx}",
93
- dtype=self.dtype,
94
+ dtype=dtype,
94
95
  )
95
96
  self.stages_list.append(stage_layer)
96
97
 
@@ -17,8 +17,8 @@ class HGNetV2LearnableAffineBlock(keras.layers.Layer):
17
17
  **kwargs: Additional keyword arguments passed to the parent class.
18
18
  """
19
19
 
20
- def __init__(self, scale_value=1.0, bias_value=0.0, **kwargs):
21
- super().__init__(**kwargs)
20
+ def __init__(self, scale_value=1.0, bias_value=0.0, dtype=None, **kwargs):
21
+ super().__init__(dtype=dtype, **kwargs)
22
22
  self.scale_value = scale_value
23
23
  self.bias_value = bias_value
24
24
 
@@ -87,9 +87,10 @@ class HGNetV2ConvLayer(keras.layers.Layer):
87
87
  use_learnable_affine_block=False,
88
88
  data_format=None,
89
89
  channel_axis=None,
90
+ dtype=None,
90
91
  **kwargs,
91
92
  ):
92
- super().__init__(**kwargs)
93
+ super().__init__(dtype=dtype, **kwargs)
93
94
  self.in_channels = in_channels
94
95
  self.out_channels = out_channels
95
96
  self.kernel_size = kernel_size
@@ -104,6 +105,7 @@ class HGNetV2ConvLayer(keras.layers.Layer):
104
105
  padding=((pad, pad), (pad, pad)),
105
106
  data_format=self.data_format,
106
107
  name=f"{self.name}_pad" if self.name else None,
108
+ dtype=self.dtype_policy,
107
109
  )
108
110
  self.convolution = keras.layers.Conv2D(
109
111
  filters=self.out_channels,
@@ -156,7 +158,8 @@ class HGNetV2ConvLayer(keras.layers.Layer):
156
158
  )
157
159
  else:
158
160
  self.lab = keras.layers.Identity(
159
- name=f"{self.name}_identity_lab" if self.name else None
161
+ name=f"{self.name}_identity_lab" if self.name else None,
162
+ dtype=self.dtype_policy,
160
163
  )
161
164
 
162
165
  def build(self, input_shape):
@@ -230,9 +233,10 @@ class HGNetV2ConvLayerLight(keras.layers.Layer):
230
233
  use_learnable_affine_block=False,
231
234
  data_format=None,
232
235
  channel_axis=None,
236
+ dtype=None,
233
237
  **kwargs,
234
238
  ):
235
- super().__init__(**kwargs)
239
+ super().__init__(dtype=dtype, **kwargs)
236
240
  self.in_channels = in_channels
237
241
  self.out_channels = out_channels
238
242
  self.kernel_size = kernel_size
@@ -327,9 +331,10 @@ class HGNetV2Embeddings(keras.layers.Layer):
327
331
  use_learnable_affine_block,
328
332
  data_format=None,
329
333
  channel_axis=None,
334
+ dtype=None,
330
335
  **kwargs,
331
336
  ):
332
- super().__init__(**kwargs)
337
+ super().__init__(dtype=dtype, **kwargs)
333
338
  self.stem_channels = stem_channels
334
339
  self.hidden_act = hidden_act
335
340
  self.use_learnable_affine_block = use_learnable_affine_block
@@ -352,6 +357,7 @@ class HGNetV2Embeddings(keras.layers.Layer):
352
357
  padding=((0, 1), (0, 1)),
353
358
  data_format=self.data_format,
354
359
  name=f"{self.name}_padding1" if self.name else "padding1",
360
+ dtype=self.dtype_policy,
355
361
  )
356
362
  self.stem2a_layer = HGNetV2ConvLayer(
357
363
  in_channels=self.stem_channels[1],
@@ -370,6 +376,7 @@ class HGNetV2Embeddings(keras.layers.Layer):
370
376
  padding=((0, 1), (0, 1)),
371
377
  data_format=self.data_format,
372
378
  name=f"{self.name}_padding2" if self.name else "padding2",
379
+ dtype=self.dtype_policy,
373
380
  )
374
381
  self.stem2b_layer = HGNetV2ConvLayer(
375
382
  in_channels=self.stem_channels[1] // 2,
@@ -390,10 +397,12 @@ class HGNetV2Embeddings(keras.layers.Layer):
390
397
  padding="valid",
391
398
  data_format=self.data_format,
392
399
  name=f"{self.name}_pool" if self.name else "pool",
400
+ dtype=self.dtype_policy,
393
401
  )
394
402
  self.concatenate_layer = keras.layers.Concatenate(
395
403
  axis=self.channel_axis,
396
404
  name=f"{self.name}_concat" if self.name else "concat",
405
+ dtype=self.dtype_policy,
397
406
  )
398
407
  self.stem3_layer = HGNetV2ConvLayer(
399
408
  in_channels=self.stem_channels[1] * 2,
@@ -550,9 +559,10 @@ class HGNetV2BasicLayer(keras.layers.Layer):
550
559
  use_learnable_affine_block=False,
551
560
  data_format=None,
552
561
  channel_axis=None,
562
+ dtype=None,
553
563
  **kwargs,
554
564
  ):
555
- super().__init__(**kwargs)
565
+ super().__init__(dtype=dtype, **kwargs)
556
566
  self.in_channels_arg = in_channels
557
567
  self.middle_channels = middle_channels
558
568
  self.out_channels = out_channels
@@ -635,23 +645,27 @@ class HGNetV2BasicLayer(keras.layers.Layer):
635
645
  self.drop_path_rate,
636
646
  noise_shape=(None, 1, 1, 1),
637
647
  name=f"{self.name}_drop_path" if self.name else "drop_path",
648
+ dtype=self.dtype_policy,
638
649
  )
639
650
  else:
640
651
  self.drop_path_layer = keras.layers.Identity(
641
652
  name=f"{self.name}_identity_drop_path"
642
653
  if self.name
643
- else "identity_drop_path"
654
+ else "identity_drop_path",
655
+ dtype=self.dtype_policy,
644
656
  )
645
657
 
646
658
  self.concatenate_layer = keras.layers.Concatenate(
647
659
  axis=self.channel_axis,
648
660
  name=f"{self.name}_concat" if self.name else "concat",
661
+ dtype=self.dtype_policy,
649
662
  )
650
663
  if self.residual:
651
664
  self.add_layer = keras.layers.Add(
652
665
  name=f"{self.name}_add_residual"
653
666
  if self.name
654
- else "add_residual"
667
+ else "add_residual",
668
+ dtype=self.dtype_policy,
655
669
  )
656
670
 
657
671
  def build(self, input_shape):
@@ -794,9 +808,10 @@ class HGNetV2Stage(keras.layers.Layer):
794
808
  drop_path: float = 0.0,
795
809
  data_format=None,
796
810
  channel_axis=None,
811
+ dtype=None,
797
812
  **kwargs,
798
813
  ):
799
- super().__init__(**kwargs)
814
+ super().__init__(dtype=dtype, **kwargs)
800
815
  self.stage_in_channels = stage_in_channels
801
816
  self.stage_mid_channels = stage_mid_channels
802
817
  self.stage_out_channels = stage_out_channels
@@ -842,7 +857,8 @@ class HGNetV2Stage(keras.layers.Layer):
842
857
  self.downsample_layer = keras.layers.Identity(
843
858
  name=f"{self.name}_identity_downsample"
844
859
  if self.name
845
- else "identity_downsample"
860
+ else "identity_downsample",
861
+ dtype=self.dtype_policy,
846
862
  )
847
863
 
848
864
  self.blocks_list = []
File without changes
@@ -0,0 +1,134 @@
1
+ import keras
2
+
3
+ from keras_hub.src.api_export import keras_hub_export
4
+ from keras_hub.src.models.backbone import Backbone
5
+ from keras_hub.src.models.parseq.parseq_decoder import PARSeqDecoder
6
+
7
+
8
+ @keras_hub_export("keras_hub.models.PARSeqBackbone")
9
+ class PARSeqBackbone(Backbone):
10
+ """Scene Text Detection with PARSeq.
11
+
12
+ Performs OCR in natural scenes using the PARSeq model described in [Scene
13
+ Text Recognition with Permuted Autoregressive Sequence Models](
14
+ https://arxiv.org/abs/2207.06966). PARSeq is a ViT-based model that allows
15
+ iterative decoding by performing an autoregressive decoding phase, followed
16
+ by a refinement phase.
17
+
18
+ Args:
19
+ image_encoder: keras.Model. The image encoder model.
20
+ vocabulary_size: int. The size of the vocabulary.
21
+ max_label_length: int. The maximum length of the label sequence.
22
+ decoder_hidden_dim: int. The dimension of the decoder hidden layers.
23
+ num_decoder_layers: int. The number of decoder layers.
24
+ num_decoder_heads: int. The number of attention heads in the decoder.
25
+ decoder_mlp_dim: int. The dimension of the decoder MLP hidden layer.
26
+ dropout_rate: float. The dropout rate for the decoder network.
27
+ Defaults to `0.1`.
28
+ attention_dropout: float. The dropout rate for the attention weights.
29
+ Defaults to `0.1`.
30
+ dtype: str. `None`, str, or `keras.mixed_precision.DTypePolicy`. The
31
+ dtype to use for the computations and weights.
32
+ **kwargs: Additional keyword arguments passed to the base
33
+ `keras.Model` constructor.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ image_encoder,
39
+ vocabulary_size,
40
+ max_label_length,
41
+ decoder_hidden_dim,
42
+ num_decoder_layers,
43
+ num_decoder_heads,
44
+ decoder_mlp_dim,
45
+ dropout_rate=0.1,
46
+ attention_dropout=0.1,
47
+ dtype=None,
48
+ **kwargs,
49
+ ):
50
+ # === Layers ===
51
+ self.image_encoder = image_encoder
52
+ self.decoder = PARSeqDecoder(
53
+ vocabulary_size=vocabulary_size,
54
+ max_label_length=max_label_length,
55
+ num_layers=num_decoder_layers,
56
+ num_heads=num_decoder_heads,
57
+ hidden_dim=decoder_hidden_dim,
58
+ mlp_dim=decoder_mlp_dim,
59
+ dropout_rate=dropout_rate,
60
+ attention_dropout=attention_dropout,
61
+ name="decoder",
62
+ dtype=dtype,
63
+ )
64
+ self.head = keras.layers.Dense(
65
+ vocabulary_size - 2, # We don't predict <bos> nor <pad>
66
+ dtype=dtype,
67
+ )
68
+
69
+ # === Functional Model ===
70
+ image_input = self.image_encoder.input
71
+
72
+ token_id_input = keras.Input(
73
+ shape=(None,), dtype="int32", name="token_ids"
74
+ )
75
+ padding_mask_input = keras.Input(
76
+ shape=(None,), dtype="int32", name="padding_mask"
77
+ )
78
+
79
+ memory = self.image_encoder(image_input)
80
+ target_out = self.decoder(
81
+ token_id_input, memory, padding_mask=padding_mask_input
82
+ )
83
+ logits = self.head(target_out)
84
+
85
+ # === Config ===
86
+ self.vocabulary_size = vocabulary_size
87
+ self.max_label_length = max_label_length
88
+ self.decoder_hidden_dim = decoder_hidden_dim
89
+ self.num_decoder_layers = num_decoder_layers
90
+ self.num_decoder_heads = num_decoder_heads
91
+ self.decoder_mlp_dim = decoder_mlp_dim
92
+ self.dropout_rate = dropout_rate
93
+ self.attention_dropout = attention_dropout
94
+
95
+ super().__init__(
96
+ inputs={
97
+ "images": image_input,
98
+ "token_ids": token_id_input,
99
+ "padding_mask": padding_mask_input,
100
+ },
101
+ outputs=logits,
102
+ dtype=dtype,
103
+ **kwargs,
104
+ )
105
+
106
+ def get_config(self):
107
+ config = super().get_config()
108
+ config.update(
109
+ {
110
+ "image_encoder": keras.layers.serialize(self.image_encoder),
111
+ "vocabulary_size": self.vocabulary_size,
112
+ "max_label_length": self.max_label_length,
113
+ "decoder_hidden_dim": self.decoder_hidden_dim,
114
+ "num_decoder_layers": self.num_decoder_layers,
115
+ "num_decoder_heads": self.num_decoder_heads,
116
+ "decoder_mlp_dim": self.decoder_mlp_dim,
117
+ "dropout_rate": self.dropout_rate,
118
+ "attention_dropout": self.attention_dropout,
119
+ }
120
+ )
121
+
122
+ return config
123
+
124
+ @classmethod
125
+ def from_config(cls, config):
126
+ config.update(
127
+ {
128
+ "image_encoder": keras.layers.deserialize(
129
+ config["image_encoder"]
130
+ ),
131
+ }
132
+ )
133
+
134
+ return super().from_config(config)