keras-hub-nightly 0.23.0.dev202509190415__py3-none-any.whl → 0.23.0.dev202509290422__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of keras-hub-nightly might be problematic. Click here for more details.

Files changed (32) hide show
  1. keras_hub/layers/__init__.py +3 -0
  2. keras_hub/models/__init__.py +24 -0
  3. keras_hub/src/models/depth_anything/__init__.py +9 -0
  4. keras_hub/src/models/depth_anything/depth_anything_backbone.py +232 -0
  5. keras_hub/src/models/depth_anything/depth_anything_depth_estimator.py +70 -0
  6. keras_hub/src/models/depth_anything/depth_anything_depth_estimator_preprocessor.py +16 -0
  7. keras_hub/src/models/depth_anything/depth_anything_image_converter.py +10 -0
  8. keras_hub/src/models/depth_anything/depth_anything_layers.py +725 -0
  9. keras_hub/src/models/depth_anything/depth_anything_loss.py +89 -0
  10. keras_hub/src/models/depth_anything/depth_anything_presets.py +4 -0
  11. keras_hub/src/models/depth_anything/interpolate.py +62 -0
  12. keras_hub/src/models/depth_estimator.py +239 -0
  13. keras_hub/src/models/depth_estimator_preprocessor.py +78 -0
  14. keras_hub/src/models/dinov2/dinov2_backbone.py +29 -3
  15. keras_hub/src/models/dinov2/dinov2_layers.py +13 -3
  16. keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py +371 -0
  17. keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +365 -0
  18. keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm.py +357 -0
  19. keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_preprocessor.py +12 -0
  20. keras_hub/src/models/qwen3_moe/qwen3_moe_decoder.py +672 -0
  21. keras_hub/src/models/qwen3_moe/qwen3_moe_layernorm.py +45 -0
  22. keras_hub/src/models/qwen3_moe/qwen3_moe_tokenizer.py +48 -0
  23. keras_hub/src/tests/test_case.py +3 -2
  24. keras_hub/src/utils/transformers/convert_dinov2.py +1 -0
  25. keras_hub/src/utils/transformers/convert_qwen3_moe.py +216 -0
  26. keras_hub/src/utils/transformers/preset_loader.py +3 -0
  27. keras_hub/src/version.py +1 -1
  28. keras_hub/tokenizers/__init__.py +3 -0
  29. {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/METADATA +1 -1
  30. {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/RECORD +32 -13
  31. {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/WHEEL +0 -0
  32. {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,89 @@
1
+ import keras
2
+ from keras import ops
3
+ from keras.src.losses.losses import LossFunctionWrapper
4
+
5
+
6
+ class DepthAnythingLoss(LossFunctionWrapper):
7
+ """Computes the DepthAnything loss between `y_true` & `y_pred`.
8
+
9
+ This loss is the Scale-Invariant Logarithmic (SiLog) loss, which is
10
+ widely used for depth estimation tasks.
11
+
12
+ See: [Depth Map Prediction from a Single Image using a Multi-Scale Deep Network](https://arxiv.org/abs/1406.2283)
13
+
14
+ Args:
15
+ lambd: The weighting factor in the scale-invariant log loss formula.
16
+ Defaults to `0.5`.
17
+ min_depth: Minimum depth value used to filter `y_pred` and `y_true`.
18
+ Defaults to `keras.config.epsilon()`.
19
+ max_depth: Optional maximum depth value used to filter `y_pred` and
20
+ `y_true`. If not specified, there will be no upper bound.
21
+ reduction: Type of reduction to apply to the loss. In almost all cases
22
+ this should be `"sum_over_batch_size"`. Supported options are
23
+ `"sum"`, `"sum_over_batch_size"`, `"mean"`,
24
+ `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss,
25
+ `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the
26
+ sample size, and `"mean_with_sample_weight"` sums the loss and
27
+ divides by the sum of the sample weights. `"none"` and `None`
28
+ perform no aggregation. Defaults to `"sum_over_batch_size"`.
29
+ name: Optional name for the instance.
30
+ dtype: The dtype of the loss's computations. Defaults to `None`, which
31
+ means using `keras.backend.floatx()`. `keras.backend.floatx()` is a
32
+ `"float32"` unless set to different value
33
+ (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
34
+ provided, then the `compute_dtype` will be utilized.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ lambd=0.5,
40
+ min_depth=keras.config.epsilon(),
41
+ max_depth=None,
42
+ reduction="sum_over_batch_size",
43
+ name="depth_anything_loss",
44
+ dtype=None,
45
+ ):
46
+ super().__init__(
47
+ silog,
48
+ name=name,
49
+ reduction=reduction,
50
+ dtype=dtype,
51
+ lambd=lambd,
52
+ min_depth=min_depth,
53
+ max_depth=max_depth,
54
+ )
55
+
56
+
57
+ def silog(
58
+ y_true, y_pred, lambd=0.5, min_depth=keras.config.epsilon(), max_depth=None
59
+ ):
60
+ y_pred = ops.convert_to_tensor(y_pred)
61
+ y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype)
62
+
63
+ # Apply the valid mask.
64
+ if max_depth is None:
65
+ valid_mask = ops.greater_equal(y_true, min_depth)
66
+ else:
67
+ valid_mask = ops.logical_and(
68
+ ops.greater_equal(y_true, min_depth),
69
+ ops.less_equal(y_true, max_depth),
70
+ )
71
+ y_true = ops.multiply(y_true, valid_mask)
72
+ y_pred = ops.multiply(y_pred, valid_mask)
73
+
74
+ diff_log = ops.where(
75
+ valid_mask,
76
+ ops.subtract(ops.log(y_true), ops.log(y_pred)),
77
+ ops.zeros_like(y_true),
78
+ )
79
+
80
+ divisor = ops.sum(ops.cast(valid_mask, y_true.dtype), axis=(1, 2, 3))
81
+ mean_power2_diff_log = ops.divide_no_nan(
82
+ ops.sum(ops.power(diff_log, 2), axis=(1, 2, 3)), divisor
83
+ )
84
+ power2_mean_diff_log = ops.power(
85
+ ops.divide_no_nan(ops.sum(diff_log, axis=(1, 2, 3)), divisor), 2
86
+ )
87
+ return ops.sqrt(
88
+ mean_power2_diff_log - ops.multiply(lambd, power2_mean_diff_log)
89
+ )
@@ -0,0 +1,4 @@
1
+ """DepthAnything model preset configurations."""
2
+
3
+ # Metadata for loading pretrained model weights.
4
+ backbone_presets = {}
@@ -0,0 +1,62 @@
1
+ from keras import backend
2
+ from keras import ops
3
+
4
+ from keras_hub.src.utils.keras_utils import standardize_data_format
5
+
6
+
7
+ def interpolate(x, size, data_format=None):
8
+ """Performs a backend-agnostic version of Torch's `F.interpolate`.
9
+
10
+ Args:
11
+ x: A 4D image tensor.
12
+ size: A tuple of 2 integers, `(height, width)`.
13
+ data_format: One of `channels_last` or `channels_first`.
14
+ """
15
+ data_format = standardize_data_format(data_format)
16
+ if backend.backend() == "jax":
17
+ import jax
18
+
19
+ if data_format == "channels_first":
20
+ x = ops.transpose(x, (0, 2, 3, 1))
21
+ scale = ops.convert_to_tensor(
22
+ [
23
+ (size[0] - 1.0) / (x.shape[1] - 1.0),
24
+ (size[1] - 1.0) / (x.shape[2] - 1.0),
25
+ ]
26
+ )
27
+ translation = -(scale / 2.0 - 0.5)
28
+ x = jax.image.scale_and_translate(
29
+ x,
30
+ (x.shape[0], *size, x.shape[-1]),
31
+ method="bilinear",
32
+ scale=scale,
33
+ spatial_dims=(1, 2),
34
+ translation=translation,
35
+ antialias=False,
36
+ )
37
+ if data_format == "channels_first":
38
+ x = ops.transpose(x, (0, 3, 1, 2))
39
+ elif backend.backend() == "tensorflow":
40
+ import tensorflow as tf
41
+
42
+ if data_format == "channels_first":
43
+ x = ops.transpose(x, (0, 2, 3, 1))
44
+ x = tf.compat.v1.image.resize(
45
+ x,
46
+ size=size,
47
+ method="bilinear",
48
+ align_corners=True,
49
+ )
50
+ if data_format == "channels_first":
51
+ x = ops.transpose(x, (0, 3, 1, 2))
52
+ elif backend.backend() == "torch":
53
+ import torch.nn.functional as F
54
+
55
+ if data_format == "channels_last":
56
+ x = ops.transpose(x, (0, 3, 1, 2))
57
+ x = F.interpolate(x, size=size, mode="bilinear", align_corners=True)
58
+ if data_format == "channels_last":
59
+ x = ops.transpose(x, (0, 2, 3, 1))
60
+ else:
61
+ raise NotImplementedError(f"Unsupported backend: {backend.backend()}")
62
+ return x
@@ -0,0 +1,239 @@
1
+ import keras
2
+
3
+ from keras_hub.src.api_export import keras_hub_export
4
+ from keras_hub.src.models.task import Task
5
+
6
+
7
+ class Multiplier(keras.layers.Layer):
8
+ def __init__(self, multiplier=None, **kwargs):
9
+ super().__init__(**kwargs)
10
+ self.multiplier = float(multiplier) if multiplier is not None else None
11
+
12
+ def call(self, inputs):
13
+ if self.multiplier is not None:
14
+ inputs = keras.ops.multiply(inputs, self.multiplier)
15
+ return inputs
16
+
17
+ def get_config(self):
18
+ config = super().get_config()
19
+ config.update(
20
+ {
21
+ "multiplier": self.multiplier,
22
+ }
23
+ )
24
+ return config
25
+
26
+
27
+ @keras_hub_export("keras_hub.models.DepthEstimator")
28
+ class DepthEstimator(Task):
29
+ """Base class for all depth estimation tasks.
30
+
31
+ `DepthEstimator` tasks wrap a `keras_hub.models.Backbone` and
32
+ a `keras_hub.models.Preprocessor` to create a model that can be used for
33
+ depth estimation.
34
+
35
+ To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
36
+ labels where `x` is a RGB image and `y` is a depth map. All `DepthEstimator`
37
+ tasks include a `from_preset()` constructor which can be used to load a
38
+ pre-trained config and weights.
39
+
40
+ Args:
41
+ backbone: A `keras_hub.models.Backbone` instance or a `keras.Model`.
42
+ preprocessor: `None`, a `keras_hub.models.Preprocessor` instance,
43
+ a `keras.Layer` instance, or a callable. If `None` no preprocessing
44
+ will be applied to the inputs.
45
+ depth_estimation_type: `"relative"` or `"metric"`. The type of depth map
46
+ to use. `"relative"` depth maps are up-to-scale, while `"metric"`
47
+ depth maps have metric meaning (e.g. in meters). Defaults to
48
+ `"relative"`.
49
+ min_depth: An float representing the minimum depth value. This value can
50
+ be used to filter out invalid depth values during training. Defaults
51
+ to `keras.config.epsilon()`.
52
+ max_depth: An optional float representing the maximum depth value. This
53
+ value can be used to filter out invalid depth values during
54
+ training. When `depth_estimation_type="metric"`, the model's output
55
+ will be scaled to the range `[0, max_depth]`.
56
+
57
+ Examples:
58
+
59
+ Call `predict()` to run inference.
60
+ ```python
61
+ # Load preset and train
62
+ images = np.random.randint(0, 256, size=(2, 224, 224, 3))
63
+ depth_estimator = keras_hub.models.DepthEstimator.from_preset(
64
+ "depth_anything_v2_small"
65
+ )
66
+ depth_estimator.predict(images)
67
+ ```
68
+
69
+ Call `fit()` on a single batch.
70
+ ```python
71
+ # Load preset and train
72
+ images = np.random.randint(0, 256, size=(2, 224, 224, 3))
73
+ depths = np.random.uniform(0, 10, size=(2, 224, 224))
74
+ depth_estimator = keras_hub.models.DepthEstimator.from_preset(
75
+ "depth_anything_v2_small",
76
+ depth_estimation_type="metric",
77
+ max_depth=10.0,
78
+ )
79
+ depth_estimator.fit(x=images, y=depths, batch_size=2)
80
+ ```
81
+
82
+ Call `fit()` with custom loss, optimizer and backbone.
83
+ ```python
84
+ depth_estimator = keras_hub.models.DepthEstimator.from_preset(
85
+ "depth_anything_v2_small",
86
+ depth_estimation_type="metric",
87
+ max_depth=10.0,
88
+ )
89
+ depth_estimator.compile(
90
+ loss=keras.losses.MeanSquaredError(),
91
+ optimizer=keras.optimizers.Adam(5e-5),
92
+ )
93
+ depth_estimator.backbone.trainable = False
94
+ depth_estimator.fit(x=images, y=depths, batch_size=2)
95
+ ```
96
+
97
+ Custom backbone.
98
+ ```python
99
+ images = np.random.randint(0, 256, size=(2, 224, 224, 3))
100
+ depths = np.random.uniform(0, 10, size=(2, 224, 224))
101
+ image_encoder = keras_hub.models.DINOV2Backbone.from_preset("dinov2_small")
102
+ backbone = keras_hub.models.DepthAnythingBackbone(
103
+ image_encoder=image_encoder,
104
+ patch_size=image_encoder.patch_size,
105
+ backbone_hidden_dim=image_encoder.hidden_dim,
106
+ reassemble_factors=[4, 2, 1, 0.5],
107
+ neck_hidden_dims=[48, 96, 192, 384],
108
+ fusion_hidden_dim=64,
109
+ head_hidden_dim=32,
110
+ head_in_index=-1,
111
+ )
112
+ depth_estimator = keras_hub.models.DepthEstimator(
113
+ backbone=backbone,
114
+ depth_estimation_type="metric",
115
+ max_depth=10.0,
116
+ )
117
+ depth_estimator.fit(x=images, y=depths, batch_size=2)
118
+ ```
119
+ """
120
+
121
+ def __init__(
122
+ self,
123
+ backbone,
124
+ depth_estimation_type,
125
+ min_depth=keras.config.epsilon(),
126
+ max_depth=None,
127
+ preprocessor=None,
128
+ **kwargs,
129
+ ):
130
+ # === Layers ===
131
+ self.backbone = backbone
132
+ self.preprocessor = preprocessor
133
+ if depth_estimation_type == "relative":
134
+ self.output_activation = keras.layers.ReLU(
135
+ dtype=backbone.dtype_policy,
136
+ name="output_activation",
137
+ )
138
+ elif depth_estimation_type == "metric":
139
+ self.output_activation = keras.layers.Activation(
140
+ activation="sigmoid",
141
+ dtype=backbone.dtype_policy,
142
+ name="output_activation",
143
+ )
144
+ else:
145
+ raise ValueError(
146
+ "`depth_estimation_type` should be either `'relative'` or "
147
+ "`'metric'`. "
148
+ f"Received: depth_estimation_type={depth_estimation_type}."
149
+ )
150
+ if max_depth is not None and depth_estimation_type != "metric":
151
+ raise ValueError(
152
+ "`max_depth` should only be set when "
153
+ "`depth_estimation_type='metric'`. "
154
+ f"Received: depth_estimation_type={depth_estimation_type}, "
155
+ f"max_depth={max_depth}."
156
+ )
157
+ self.multiplier = Multiplier(
158
+ multiplier=max_depth, dtype=backbone.dtype_policy, name="multiplier"
159
+ )
160
+ self.depths = keras.layers.Identity(
161
+ dtype=backbone.dtype_policy, name="depths"
162
+ )
163
+
164
+ # === Config ===
165
+ self.depth_estimation_type = depth_estimation_type
166
+ self.min_depth = float(min_depth) if min_depth is not None else None
167
+ self.max_depth = float(max_depth) if max_depth is not None else None
168
+
169
+ # === Functional Model ===
170
+ inputs = self.backbone.input
171
+ depths = self.backbone(inputs)
172
+ depths = self.output_activation(depths)
173
+ depths = self.multiplier(depths)
174
+ depths = self.depths(depths)
175
+ outputs = {"depths": depths}
176
+ super().__init__(
177
+ inputs=inputs,
178
+ outputs=outputs,
179
+ **kwargs,
180
+ )
181
+
182
+ def get_config(self):
183
+ # Backbone serialized in `super`
184
+ config = super().get_config()
185
+ config.update(
186
+ {
187
+ "depth_estimation_type": self.depth_estimation_type,
188
+ "min_depth": self.min_depth,
189
+ "max_depth": self.max_depth,
190
+ }
191
+ )
192
+ return config
193
+
194
+ def compile(
195
+ self,
196
+ optimizer="auto",
197
+ loss="auto",
198
+ *,
199
+ metrics="auto",
200
+ **kwargs,
201
+ ):
202
+ """Configures the `DepthEstimator` task for training.
203
+
204
+ The `DepthEstimator` task extends the default compilation signature of
205
+ `keras.Model.compile` with defaults for `optimizer`, `loss`, and
206
+ `metrics`. To override these defaults, pass any value
207
+ to these arguments during compilation.
208
+
209
+ Args:
210
+ optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer`
211
+ instance. Defaults to `"auto"`, which uses the default optimizer
212
+ for the given model and task. See `keras.Model.compile` and
213
+ `keras.optimizers` for more info on possible `optimizer` values.
214
+ loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance.
215
+ Defaults to `"auto"`, where a `keras.losses.MeanSquaredError`
216
+ loss will be applied for the depth estimation task. See
217
+ `keras.Model.compile` and `keras.losses` for more info on
218
+ possible `loss` values.
219
+ metrics: `"auto"`, or a dict of metrics to be evaluated by
220
+ the model during training and testing. Defaults to `"auto"`,
221
+ where a `keras.metrics.RootMeanSquaredError` will be applied to
222
+ track the accuracy of the model during training. See
223
+ `keras.Model.compile` and `keras.metrics` for more info on
224
+ possible `metrics` values.
225
+ **kwargs: See `keras.Model.compile` for a full list of arguments
226
+ supported by the compile method.
227
+ """
228
+ if optimizer == "auto":
229
+ optimizer = keras.optimizers.AdamW(5e-5)
230
+ if loss == "auto":
231
+ loss = {"depths": keras.losses.MeanSquaredError()}
232
+ if metrics == "auto":
233
+ metrics = {"depths": keras.metrics.RootMeanSquaredError()}
234
+ super().compile(
235
+ optimizer=optimizer,
236
+ loss=loss,
237
+ metrics=metrics,
238
+ **kwargs,
239
+ )
@@ -0,0 +1,78 @@
1
+ import keras
2
+
3
+ from keras_hub.src.api_export import keras_hub_export
4
+ from keras_hub.src.models.preprocessor import Preprocessor
5
+ from keras_hub.src.utils.tensor_utils import preprocessing_function
6
+
7
+
8
+ @keras_hub_export("keras_hub.models.DepthEstimatorPreprocessor")
9
+ class DepthEstimatorPreprocessor(Preprocessor):
10
+ """Base class for depth estimation preprocessing layers.
11
+
12
+ `DepthEstimatorPreprocessor` tasks wraps a
13
+ `keras_hub.layers.ImageConverter` to create a preprocessing layer for
14
+ depth estimation tasks. It is intended to be paired with a
15
+ `keras_hub.models.DepthEstimator` task.
16
+
17
+ All `DepthEstimatorPreprocessor` take inputs three inputs, `x`, `y`, and
18
+ `sample_weight`. `x`, the first input, should always be included. It can
19
+ be a image or batch of images. See examples below. `y` and `sample_weight`
20
+ are optional inputs that will be passed through unaltered. Usually, `y` will
21
+ be the depths, and `sample_weight` will not be provided.
22
+
23
+ The layer will output either `x`, an `(x, y)` tuple if depths were provided,
24
+ or an `(x, y, sample_weight)` tuple if depths and sample weight were
25
+ provided. `x` will be the input images after all model preprocessing has
26
+ been applied.
27
+
28
+ All `DepthEstimatorPreprocessor` tasks include a `from_preset()`
29
+ constructor which can be used to load a pre-trained config.
30
+ You can call the `from_preset()` constructor directly on this base class, in
31
+ which case the correct class for your model will be automatically
32
+ instantiated.
33
+
34
+ Examples.
35
+ ```python
36
+ preprocessor = keras_hub.models.DepthEstimatorPreprocessor.from_preset(
37
+ "depth_anything_v2_small",
38
+ )
39
+
40
+ # Resize a single image for DepthAnythingV2 Small.
41
+ x = np.random.randint(0, 256, (512, 512, 3))
42
+ x = preprocessor(x)
43
+
44
+ # Resize a labeled image.
45
+ x = np.random.randint(0, 256, (512, 512, 3))
46
+ y = np.random.uniform(0, 10, size=(512, 512))
47
+ x, y = preprocessor(x, y)
48
+
49
+ # Resize a batch of labeled images.
50
+ x = [
51
+ np.random.randint(0, 256, (512, 512, 3)),
52
+ np.zeros((512, 512, 3)),
53
+ ]
54
+ y = [
55
+ np.random.uniform(0, 10, size=(512, 512)),
56
+ np.random.uniform(0, 10, size=(512, 512)),
57
+ ]
58
+ x, y = preprocessor(x, y)
59
+
60
+ # Use a `tf.data.Dataset`.
61
+ ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
62
+ ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
63
+ ```
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ image_converter=None,
69
+ **kwargs,
70
+ ):
71
+ super().__init__(**kwargs)
72
+ self.image_converter = image_converter
73
+
74
+ @preprocessing_function
75
+ def call(self, x, y=None, sample_weight=None):
76
+ if self.image_converter:
77
+ x = self.image_converter(x)
78
+ return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
@@ -1,14 +1,14 @@
1
1
  from keras import layers
2
2
 
3
3
  from keras_hub.src.api_export import keras_hub_export
4
- from keras_hub.src.models.backbone import Backbone
5
4
  from keras_hub.src.models.dinov2.dinov2_layers import DINOV2Embedding
6
5
  from keras_hub.src.models.dinov2.dinov2_layers import DINOV2Encoder
6
+ from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
7
7
  from keras_hub.src.utils.keras_utils import standardize_data_format
8
8
 
9
9
 
10
10
  @keras_hub_export("keras_hub.models.DINOV2Backbone")
11
- class DINOV2Backbone(Backbone):
11
+ class DINOV2Backbone(FeaturePyramidBackbone):
12
12
  """DINOV2 core network with hyperparameters.
13
13
 
14
14
  DINOV2 offers a powerful, generalist visual backbone learned entirely from
@@ -19,6 +19,10 @@ class DINOV2Backbone(Backbone):
19
19
  DINOV2 model with any number of layers, heads, and embedding dimensions. To
20
20
  load preset architectures and weights, use the `from_preset` constructor.
21
21
 
22
+ Note that this backbone is a Feature Pyramid Backbone that can output
23
+ intermediate feature maps from different stages of the model. See the
24
+ example below for how to access these feature pyramid outputs.
25
+
22
26
  Note that this backbone supports interpolation of the position embeddings
23
27
  to the input image shape. This is useful when the input image shape is
24
28
  different from the shape used to train the position embeddings. The
@@ -50,6 +54,8 @@ class DINOV2Backbone(Backbone):
50
54
  embeddings to the actual input shape. Defaults to `(518, 518)`.
51
55
  antialias_in_interpolation: bool. Whether to use antialiasing in the
52
56
  interpolation of the position embeddings. Defaults to `False`.
57
+ apply_layernorm: bool. Whether to apply layer normalization to the
58
+ outputs of each stage in the feature pyramid. Defaults to `False`.
53
59
  data_format: `None` or str. If specified, either `"channels_last"` or
54
60
  `"channels_first"`. The ordering of the dimensions in the
55
61
  inputs. `"channels_last"` corresponds to inputs with shape
@@ -95,6 +101,16 @@ class DINOV2Backbone(Backbone):
95
101
  position_embedding_shape=(518, 518),
96
102
  )
97
103
  model(input_data)
104
+
105
+ # Accessing feature pyramid outputs.
106
+ backbone = keras_hub.models.DINOV2Backbone.from_preset(
107
+ "dinov2_base", image_shape=(224, 224, 3)
108
+ )
109
+ model = keras.Model(
110
+ inputs=backbone.inputs,
111
+ outputs=backbone.pyramid_outputs,
112
+ )
113
+ features = model(input_data)
98
114
  ```
99
115
  """
100
116
 
@@ -114,6 +130,7 @@ class DINOV2Backbone(Backbone):
114
130
  image_shape=(224, 224, 3),
115
131
  position_embedding_shape=(518, 518, 3),
116
132
  antialias_in_interpolation=False,
133
+ apply_layernorm=False,
117
134
  data_format=None,
118
135
  dtype=None,
119
136
  name=None,
@@ -176,10 +193,16 @@ class DINOV2Backbone(Backbone):
176
193
  )
177
194
 
178
195
  # === Functional Model ===
196
+ pyramid_outputs = {}
179
197
  image_input = layers.Input(shape=image_shape, name="images")
180
198
  x = self.embeddings(image_input)
181
- x = self.encoder(x)
199
+ pyramid_outputs["stem"] = x
200
+ x, encoder_pyramid_outputs = self.encoder(x)
201
+ pyramid_outputs.update(encoder_pyramid_outputs)
182
202
  x = self.layernorm(x)
203
+ if apply_layernorm:
204
+ for key in pyramid_outputs:
205
+ pyramid_outputs[key] = self.layernorm(pyramid_outputs[key])
183
206
  outputs = x
184
207
  super().__init__(
185
208
  inputs={"images": image_input},
@@ -204,6 +227,8 @@ class DINOV2Backbone(Backbone):
204
227
  self.image_shape = image_shape
205
228
  self.position_embedding_shape = position_embedding_shape
206
229
  self.antialias_in_interpolation = bool(antialias_in_interpolation)
230
+ self.apply_layernorm = apply_layernorm
231
+ self.pyramid_outputs = pyramid_outputs
207
232
 
208
233
  def get_config(self):
209
234
  config = super().get_config()
@@ -223,6 +248,7 @@ class DINOV2Backbone(Backbone):
223
248
  "image_shape": self.image_shape,
224
249
  "position_embedding_shape": self.position_embedding_shape,
225
250
  "antialias_in_interpolation": self.antialias_in_interpolation,
251
+ "apply_layernorm": self.apply_layernorm,
226
252
  }
227
253
  )
228
254
  return config
@@ -1,3 +1,4 @@
1
+ import keras
1
2
  from keras import backend
2
3
  from keras import config
3
4
  from keras import initializers
@@ -290,6 +291,10 @@ class DINOV2Embedding(layers.Layer):
290
291
  output_shape[1] = 1 + self.num_register_tokens + patch_num**2
291
292
  return output_shape
292
293
 
294
+ def compute_output_spec(self, inputs):
295
+ output_shape = self.compute_output_shape(inputs.shape)
296
+ return keras.KerasTensor(output_shape, dtype=self.compute_dtype)
297
+
293
298
  @staticmethod
294
299
  def _interpolate_position_embeddings(
295
300
  position_embeddings,
@@ -861,10 +866,12 @@ class DINOV2Encoder(layers.Layer):
861
866
  input_shape = layer.compute_output_shape(input_shape)
862
867
 
863
868
  def call(self, inputs, training=None):
869
+ pyramid_outputs = {}
864
870
  x = inputs
865
- for layer in self.layers:
871
+ for layer_index, layer in enumerate(self.layers, start=1):
866
872
  x = layer(x, training=training)
867
- return x
873
+ pyramid_outputs[f"stage{str(layer_index)}"] = x
874
+ return x, pyramid_outputs
868
875
 
869
876
  def get_config(self):
870
877
  config = super().get_config()
@@ -883,4 +890,7 @@ class DINOV2Encoder(layers.Layer):
883
890
  return config
884
891
 
885
892
  def compute_output_shape(self, input_shape):
886
- return input_shape
893
+ pyramid_outputs = {}
894
+ for layer_index in range(1, len(self.layers) + 1):
895
+ pyramid_outputs[f"stage{str(layer_index)}"] = input_shape
896
+ return input_shape, pyramid_outputs