keras-hub-nightly 0.22.0.dev202505290412__py3-none-any.whl → 0.22.0.dev202505310408__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. keras_hub/layers/__init__.py +3 -0
  2. keras_hub/models/__init__.py +16 -0
  3. keras_hub/src/models/deit/__init__.py +0 -0
  4. keras_hub/src/models/deit/deit_backbone.py +154 -0
  5. keras_hub/src/models/deit/deit_image_classifier.py +171 -0
  6. keras_hub/src/models/deit/deit_image_classifier_preprocessor.py +12 -0
  7. keras_hub/src/models/deit/deit_image_converter.py +8 -0
  8. keras_hub/src/models/deit/deit_layers.py +519 -0
  9. keras_hub/src/models/deit/deit_presets.py +49 -0
  10. keras_hub/src/models/mixtral/mixtral_presets.py +4 -4
  11. keras_hub/src/models/qwen/qwen_presets.py +6 -6
  12. keras_hub/src/models/qwen3/qwen3_attention.py +369 -0
  13. keras_hub/src/models/qwen3/qwen3_backbone.py +191 -0
  14. keras_hub/src/models/qwen3/qwen3_causal_lm_preprocessor.py +10 -0
  15. keras_hub/src/models/qwen3/qwen3_decoder.py +309 -0
  16. keras_hub/src/models/qwen3/qwen3_layernorm.py +38 -0
  17. keras_hub/src/models/qwen3/qwen3_tokenizer.py +48 -0
  18. keras_hub/src/models/qwen_moe/qwen_moe_presets.py +2 -2
  19. keras_hub/src/utils/transformers/convert_deit.py +155 -0
  20. keras_hub/src/utils/transformers/convert_qwen3.py +145 -0
  21. keras_hub/src/utils/transformers/preset_loader.py +7 -1
  22. keras_hub/src/version.py +1 -1
  23. {keras_hub_nightly-0.22.0.dev202505290412.dist-info → keras_hub_nightly-0.22.0.dev202505310408.dist-info}/METADATA +1 -1
  24. {keras_hub_nightly-0.22.0.dev202505290412.dist-info → keras_hub_nightly-0.22.0.dev202505310408.dist-info}/RECORD +26 -11
  25. {keras_hub_nightly-0.22.0.dev202505290412.dist-info → keras_hub_nightly-0.22.0.dev202505310408.dist-info}/WHEEL +0 -0
  26. {keras_hub_nightly-0.22.0.dev202505290412.dist-info → keras_hub_nightly-0.22.0.dev202505310408.dist-info}/top_level.txt +0 -0
@@ -78,6 +78,9 @@ from keras_hub.src.models.cspnet.cspnet_image_converter import (
78
78
  from keras_hub.src.models.deeplab_v3.deeplab_v3_image_converter import (
79
79
  DeepLabV3ImageConverter as DeepLabV3ImageConverter,
80
80
  )
81
+ from keras_hub.src.models.deit.deit_image_converter import (
82
+ DeiTImageConverter as DeiTImageConverter,
83
+ )
81
84
  from keras_hub.src.models.densenet.densenet_image_converter import (
82
85
  DenseNetImageConverter as DenseNetImageConverter,
83
86
  )
@@ -141,6 +141,13 @@ from keras_hub.src.models.deeplab_v3.deeplab_v3_image_segmeter_preprocessor impo
141
141
  from keras_hub.src.models.deeplab_v3.deeplab_v3_segmenter import (
142
142
  DeepLabV3ImageSegmenter as DeepLabV3ImageSegmenter,
143
143
  )
144
+ from keras_hub.src.models.deit.deit_backbone import DeiTBackbone as DeiTBackbone
145
+ from keras_hub.src.models.deit.deit_image_classifier import (
146
+ DeiTImageClassifier as DeiTImageClassifier,
147
+ )
148
+ from keras_hub.src.models.deit.deit_image_classifier_preprocessor import (
149
+ DeiTImageClassifierPreprocessor as DeiTImageClassifierPreprocessor,
150
+ )
144
151
  from keras_hub.src.models.densenet.densenet_backbone import (
145
152
  DenseNetBackbone as DenseNetBackbone,
146
153
  )
@@ -444,6 +451,15 @@ from keras_hub.src.models.qwen.qwen_tokenizer import (
444
451
  from keras_hub.src.models.qwen.qwen_tokenizer import (
445
452
  QwenTokenizer as QwenTokenizer,
446
453
  )
454
+ from keras_hub.src.models.qwen3.qwen3_backbone import (
455
+ Qwen3Backbone as Qwen3Backbone,
456
+ )
457
+ from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
458
+ Qwen3CausalLMPreprocessor as Qwen3CausalLMPreprocessor,
459
+ )
460
+ from keras_hub.src.models.qwen3.qwen3_tokenizer import (
461
+ Qwen3Tokenizer as Qwen3Tokenizer,
462
+ )
447
463
  from keras_hub.src.models.qwen_moe.qwen_moe_backbone import (
448
464
  QwenMoeBackbone as QwenMoeBackbone,
449
465
  )
File without changes
@@ -0,0 +1,154 @@
1
+ import keras
2
+
3
+ from keras_hub.src.api_export import keras_hub_export
4
+ from keras_hub.src.models.backbone import Backbone
5
+ from keras_hub.src.models.deit.deit_layers import DeiTEmbeddings
6
+ from keras_hub.src.models.deit.deit_layers import DeiTEncoder
7
+ from keras_hub.src.utils.keras_utils import standardize_data_format
8
+
9
+
10
+ @keras_hub_export("keras_hub.models.DeiTBackbone")
11
+ class DeiTBackbone(Backbone):
12
+ """DeiT backbone.
13
+
14
+ This backbone implements the Data-efficient Image Transformer (DeiT)
15
+ architecture as described in [Training data-efficient image
16
+ transformers & distillation through attention]
17
+ (https://arxiv.org/abs/2012.12877).
18
+
19
+ Args:
20
+ image_shape: A tuple or list of 3 integers representing the shape of the
21
+ input image `(height, width, channels)`.
22
+ patch_size: tuple or int. The size of each image patch. If an int is
23
+ provided, it will be used for both height and width. The input image
24
+ will be split into patches of shape `(patch_size_h, patch_size_w)`.
25
+ num_layers: int. The number of transformer encoder layers.
26
+ num_heads: int. The number of attention heads in each Transformer
27
+ encoder layer.
28
+ hidden_dim: int. The dimensionality of the hidden representations.
29
+ intermediate_dim: int. The dimensionality of the intermediate MLP layer
30
+ in each Transformer encoder layer.
31
+ dropout_rate: float. The dropout rate for the Transformer encoder
32
+ layers.
33
+ attention_dropout: float. The dropout rate for the attention mechanism
34
+ in each Transformer encoder layer.
35
+ layer_norm_epsilon: float. Value used for numerical stability in layer
36
+ normalization.
37
+ use_mha_bias: bool. Whether to use bias in the multi-head attention
38
+ layers.
39
+ data_format: str. `"channels_last"` or `"channels_first"`, specifying
40
+ the data format for the input image. If `None`, defaults to
41
+ `"channels_last"`.
42
+ dtype: The dtype of the layer weights. Defaults to None.
43
+ **kwargs: Additional keyword arguments to be passed to the parent
44
+ `Backbone` class.
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ image_shape,
50
+ patch_size,
51
+ num_layers,
52
+ num_heads,
53
+ hidden_dim,
54
+ intermediate_dim,
55
+ dropout_rate=0.0,
56
+ attention_dropout=0.0,
57
+ layer_norm_epsilon=1e-6,
58
+ use_mha_bias=True,
59
+ data_format=None,
60
+ dtype=None,
61
+ **kwargs,
62
+ ):
63
+ # === Laters ===
64
+ data_format = standardize_data_format(data_format)
65
+ if isinstance(patch_size, int):
66
+ patch_size = (patch_size, patch_size)
67
+ h_axis, w_axis, channels_axis = (
68
+ (-3, -2, -1) if data_format == "channels_last" else (-2, -1, -3)
69
+ )
70
+ # Check that the input image is well specified.
71
+ if image_shape[h_axis] is None or image_shape[w_axis] is None:
72
+ raise ValueError(
73
+ f"Image shape must have defined height and width. Found `None` "
74
+ f"at index {h_axis} (height) or {w_axis} (width). "
75
+ f"Image shape: {image_shape}"
76
+ )
77
+ # Check that image dimensions be divisible by patch size
78
+ if image_shape[h_axis] % patch_size[0] != 0:
79
+ raise ValueError(
80
+ f"Input height {image_shape[h_axis]} should be divisible by "
81
+ f"patch size {patch_size}."
82
+ )
83
+ if image_shape[w_axis] % patch_size[1] != 0:
84
+ raise ValueError(
85
+ f"Input height {image_shape[w_axis]} should be divisible by "
86
+ f"patch size {patch_size}."
87
+ )
88
+
89
+ num_channels = image_shape[channels_axis]
90
+
91
+ # === Functional Model ===
92
+ inputs = keras.layers.Input(shape=image_shape)
93
+
94
+ x = DeiTEmbeddings(
95
+ image_size=(image_shape[h_axis], image_shape[w_axis]),
96
+ patch_size=patch_size,
97
+ hidden_dim=hidden_dim,
98
+ num_channels=num_channels,
99
+ data_format=data_format,
100
+ dropout_rate=dropout_rate,
101
+ dtype=dtype,
102
+ name="deit_patching_and_embedding",
103
+ )(inputs)
104
+
105
+ output, _, _ = DeiTEncoder(
106
+ num_layers=num_layers,
107
+ num_heads=num_heads,
108
+ hidden_dim=hidden_dim,
109
+ intermediate_dim=intermediate_dim,
110
+ use_mha_bias=use_mha_bias,
111
+ dropout_rate=dropout_rate,
112
+ attention_dropout=attention_dropout,
113
+ layer_norm_epsilon=layer_norm_epsilon,
114
+ dtype=dtype,
115
+ name="deit_encoder",
116
+ )(x)
117
+
118
+ super().__init__(
119
+ inputs=inputs,
120
+ outputs=output,
121
+ dtype=dtype,
122
+ **kwargs,
123
+ )
124
+
125
+ # === Config ===
126
+ self.image_shape = image_shape
127
+ self.patch_size = patch_size
128
+ self.num_layers = num_layers
129
+ self.num_heads = num_heads
130
+ self.hidden_dim = hidden_dim
131
+ self.intermediate_dim = intermediate_dim
132
+ self.dropout_rate = dropout_rate
133
+ self.attention_dropout = attention_dropout
134
+ self.layer_norm_epsilon = layer_norm_epsilon
135
+ self.use_mha_bias = use_mha_bias
136
+ self.data_format = data_format
137
+
138
+ def get_config(self):
139
+ config = super().get_config()
140
+ config.update(
141
+ {
142
+ "image_shape": self.image_shape,
143
+ "patch_size": self.patch_size,
144
+ "num_layers": self.num_layers,
145
+ "num_heads": self.num_heads,
146
+ "hidden_dim": self.hidden_dim,
147
+ "intermediate_dim": self.intermediate_dim,
148
+ "dropout_rate": self.dropout_rate,
149
+ "attention_dropout": self.attention_dropout,
150
+ "layer_norm_epsilon": self.layer_norm_epsilon,
151
+ "use_mha_bias": self.use_mha_bias,
152
+ }
153
+ )
154
+ return config
@@ -0,0 +1,171 @@
1
+ import keras
2
+ from keras import ops
3
+
4
+ from keras_hub.src.api_export import keras_hub_export
5
+ from keras_hub.src.models.deit.deit_backbone import DeiTBackbone
6
+ from keras_hub.src.models.deit.deit_image_classifier_preprocessor import (
7
+ DeiTImageClassifierPreprocessor,
8
+ )
9
+ from keras_hub.src.models.image_classifier import ImageClassifier
10
+ from keras_hub.src.models.task import Task
11
+
12
+
13
+ @keras_hub_export("keras_hub.models.DeiTImageClassifier")
14
+ class DeiTImageClassifier(ImageClassifier):
15
+ """DeiT image classification task.
16
+
17
+ `DeiTImageClassifier` tasks wrap a `keras_hub.models.DeiTBackbone` and
18
+ a `keras_hub.models.Preprocessor` to create a model that can be used for
19
+ image classification. `DeiTImageClassifier` tasks take an additional
20
+ `num_classes` argument, controlling the number of predicted output classes.
21
+
22
+ To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
23
+ labels where `x` is a string and `y` is a integer from `[0, num_classes)`.
24
+
25
+ Not that unlike `keras_hub.model.ImageClassifier`, the `DeiTImageClassifier`
26
+ we pluck out `cls_token` which is first seqence from the backbone.
27
+
28
+ Args:
29
+ backbone: A `keras_hub.models.DeiTBackbone` instance or a `keras.Model`.
30
+ num_classes: int. The number of classes to predict.
31
+ preprocessor: `None`, a `keras_hub.models.Preprocessor` instance,
32
+ a `keras.Layer` instance, or a callable. If `None` no preprocessing
33
+ will be applied to the inputs.
34
+ pooling: String specifying the classification strategy. The choice
35
+ impacts the dimensionality and nature of the feature vector used for
36
+ classification.
37
+ `"token"`: A single vector (class token) representing the
38
+ overall image features.
39
+ `"gap"`: A single vector representing the average features
40
+ across the spatial dimensions.
41
+ activation: `None`, str, or callable. The activation function to use on
42
+ the `Dense` layer. Set `activation=None` to return the output
43
+ logits. Defaults to `None`.
44
+ head_dtype: `None`, str, or `keras.mixed_precision.DTypePolicy`. The
45
+ dtype to use for the classification head's computations and weights.
46
+
47
+ Examples:
48
+
49
+ Call `predict()` to run inference.
50
+ ```python
51
+ # Load preset and train
52
+ images = np.random.randint(0, 256, size=(2, 384, 384, 3))
53
+ classifier = keras_hub.models.DeiTImageClassifier.from_preset(
54
+ "hf://facebook/deit-base-distilled-patch16-384"
55
+ )
56
+ classifier.predict(images)
57
+ ```
58
+
59
+ Call `fit()` on a single batch.
60
+ ```python
61
+ # Load preset and train
62
+ images = np.random.randint(0, 256, size=(2, 384, 384, 3))
63
+ labels = [0, 3]
64
+ classifier = keras_hub.models.DeiTImageClassifier.from_preset(
65
+ "hf://facebook/deit-base-distilled-patch16-384"
66
+ )
67
+ classifier.fit(x=images, y=labels, batch_size=2)
68
+ ```
69
+
70
+ Call `fit()` with custom loss, optimizer and backbone.
71
+ ```python
72
+ classifier = keras_hub.models.DeiTImageClassifier.from_preset(
73
+ "hf://facebook/deit-base-distilled-patch16-384"
74
+ )
75
+ classifier.compile(
76
+ loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
77
+ optimizer=keras.optimizers.Adam(5e-5),
78
+ )
79
+ classifier.backbone.trainable = False
80
+ classifier.fit(x=images, y=labels, batch_size=2)
81
+ ```
82
+
83
+ Custom backbone.
84
+ ```python
85
+ images = np.random.randint(0, 256, size=(2, 384, 384, 3))
86
+ labels = [0, 3]
87
+ backbone = keras_hub.models.DeiTBackbone(
88
+ image_shape = (384, 384, 3),
89
+ patch_size=16,
90
+ num_layers=6,
91
+ num_heads=3,
92
+ hidden_dim=768,
93
+ intermediate_dim=2048
94
+ )
95
+ classifier = keras_hub.models.DeiTImageClassifier(
96
+ backbone=backbone,
97
+ num_classes=4,
98
+ )
99
+ classifier.fit(x=images, y=labels, batch_size=2)
100
+ ```
101
+ """
102
+
103
+ backbone_cls = DeiTBackbone
104
+ preprocessor_cls = DeiTImageClassifierPreprocessor
105
+
106
+ def __init__(
107
+ self,
108
+ backbone,
109
+ num_classes,
110
+ preprocessor=None,
111
+ pooling="token",
112
+ activation=None,
113
+ dropout=0.0,
114
+ head_dtype=None,
115
+ **kwargs,
116
+ ):
117
+ head_dtype = head_dtype or backbone.dtype_policy
118
+
119
+ # === Layers ===
120
+ self.backbone = backbone
121
+ self.preprocessor = preprocessor
122
+ self.dropout = keras.layers.Dropout(
123
+ rate=dropout,
124
+ dtype=head_dtype,
125
+ name="output_dropout",
126
+ )
127
+
128
+ self.output_dense = keras.layers.Dense(
129
+ num_classes,
130
+ activation=activation,
131
+ dtype=head_dtype,
132
+ name="predictions",
133
+ )
134
+
135
+ # === Functional Model ===
136
+ inputs = self.backbone.input
137
+ x = self.backbone(inputs)
138
+ if pooling == "token":
139
+ x = x[:, 0]
140
+ elif pooling == "gap":
141
+ ndim = len(ops.shape(x))
142
+ x = ops.mean(x, axis=list(range(1, ndim - 1))) # (1,) or (1,2)
143
+
144
+ outputs = self.output_dense(x)
145
+
146
+ # Skip the parent class functional model.
147
+ Task.__init__(
148
+ self,
149
+ inputs=inputs,
150
+ outputs=outputs,
151
+ **kwargs,
152
+ )
153
+
154
+ # === config ===
155
+ self.num_classes = num_classes
156
+ self.pooling = pooling
157
+ self.activation = activation
158
+ self.dropout = dropout
159
+
160
+ def get_config(self):
161
+ # Backbone serialized in `super`
162
+ config = super().get_config()
163
+ config.update(
164
+ {
165
+ "num_classes": self.num_classes,
166
+ "pooling": self.pooling,
167
+ "activation": self.activation,
168
+ "dropout": self.dropout,
169
+ }
170
+ )
171
+ return config
@@ -0,0 +1,12 @@
1
+ from keras_hub.src.api_export import keras_hub_export
2
+ from keras_hub.src.models.deit.deit_backbone import DeiTBackbone
3
+ from keras_hub.src.models.deit.deit_image_converter import DeiTImageConverter
4
+ from keras_hub.src.models.image_classifier_preprocessor import (
5
+ ImageClassifierPreprocessor,
6
+ )
7
+
8
+
9
+ @keras_hub_export("keras_hub.models.DeiTImageClassifierPreprocessor")
10
+ class DeiTImageClassifierPreprocessor(ImageClassifierPreprocessor):
11
+ backbone_cls = DeiTBackbone
12
+ image_converter_cls = DeiTImageConverter
@@ -0,0 +1,8 @@
1
+ from keras_hub.src.api_export import keras_hub_export
2
+ from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
3
+ from keras_hub.src.models.deit.deit_backbone import DeiTBackbone
4
+
5
+
6
+ @keras_hub_export("keras_hub.layers.DeiTImageConverter")
7
+ class DeiTImageConverter(ImageConverter):
8
+ backbone_cls = DeiTBackbone