keras-hub-nightly 0.23.0.dev202509190415__py3-none-any.whl → 0.23.0.dev202509290422__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of keras-hub-nightly might be problematic. Click here for more details.
- keras_hub/layers/__init__.py +3 -0
- keras_hub/models/__init__.py +24 -0
- keras_hub/src/models/depth_anything/__init__.py +9 -0
- keras_hub/src/models/depth_anything/depth_anything_backbone.py +232 -0
- keras_hub/src/models/depth_anything/depth_anything_depth_estimator.py +70 -0
- keras_hub/src/models/depth_anything/depth_anything_depth_estimator_preprocessor.py +16 -0
- keras_hub/src/models/depth_anything/depth_anything_image_converter.py +10 -0
- keras_hub/src/models/depth_anything/depth_anything_layers.py +725 -0
- keras_hub/src/models/depth_anything/depth_anything_loss.py +89 -0
- keras_hub/src/models/depth_anything/depth_anything_presets.py +4 -0
- keras_hub/src/models/depth_anything/interpolate.py +62 -0
- keras_hub/src/models/depth_estimator.py +239 -0
- keras_hub/src/models/depth_estimator_preprocessor.py +78 -0
- keras_hub/src/models/dinov2/dinov2_backbone.py +29 -3
- keras_hub/src/models/dinov2/dinov2_layers.py +13 -3
- keras_hub/src/models/qwen3_moe/qwen3_moe_attention.py +371 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_backbone.py +365 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm.py +357 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_preprocessor.py +12 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_decoder.py +672 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_layernorm.py +45 -0
- keras_hub/src/models/qwen3_moe/qwen3_moe_tokenizer.py +48 -0
- keras_hub/src/tests/test_case.py +3 -2
- keras_hub/src/utils/transformers/convert_dinov2.py +1 -0
- keras_hub/src/utils/transformers/convert_qwen3_moe.py +216 -0
- keras_hub/src/utils/transformers/preset_loader.py +3 -0
- keras_hub/src/version.py +1 -1
- keras_hub/tokenizers/__init__.py +3 -0
- {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/METADATA +1 -1
- {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/RECORD +32 -13
- {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/WHEEL +0 -0
- {keras_hub_nightly-0.23.0.dev202509190415.dist-info → keras_hub_nightly-0.23.0.dev202509290422.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import keras
|
|
2
|
+
from keras import ops
|
|
3
|
+
from keras.src.losses.losses import LossFunctionWrapper
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DepthAnythingLoss(LossFunctionWrapper):
|
|
7
|
+
"""Computes the DepthAnything loss between `y_true` & `y_pred`.
|
|
8
|
+
|
|
9
|
+
This loss is the Scale-Invariant Logarithmic (SiLog) loss, which is
|
|
10
|
+
widely used for depth estimation tasks.
|
|
11
|
+
|
|
12
|
+
See: [Depth Map Prediction from a Single Image using a Multi-Scale Deep Network](https://arxiv.org/abs/1406.2283)
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
lambd: The weighting factor in the scale-invariant log loss formula.
|
|
16
|
+
Defaults to `0.5`.
|
|
17
|
+
min_depth: Minimum depth value used to filter `y_pred` and `y_true`.
|
|
18
|
+
Defaults to `keras.config.epsilon()`.
|
|
19
|
+
max_depth: Optional maximum depth value used to filter `y_pred` and
|
|
20
|
+
`y_true`. If not specified, there will be no upper bound.
|
|
21
|
+
reduction: Type of reduction to apply to the loss. In almost all cases
|
|
22
|
+
this should be `"sum_over_batch_size"`. Supported options are
|
|
23
|
+
`"sum"`, `"sum_over_batch_size"`, `"mean"`,
|
|
24
|
+
`"mean_with_sample_weight"` or `None`. `"sum"` sums the loss,
|
|
25
|
+
`"sum_over_batch_size"` and `"mean"` sum the loss and divide by the
|
|
26
|
+
sample size, and `"mean_with_sample_weight"` sums the loss and
|
|
27
|
+
divides by the sum of the sample weights. `"none"` and `None`
|
|
28
|
+
perform no aggregation. Defaults to `"sum_over_batch_size"`.
|
|
29
|
+
name: Optional name for the instance.
|
|
30
|
+
dtype: The dtype of the loss's computations. Defaults to `None`, which
|
|
31
|
+
means using `keras.backend.floatx()`. `keras.backend.floatx()` is a
|
|
32
|
+
`"float32"` unless set to different value
|
|
33
|
+
(via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
|
|
34
|
+
provided, then the `compute_dtype` will be utilized.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
lambd=0.5,
|
|
40
|
+
min_depth=keras.config.epsilon(),
|
|
41
|
+
max_depth=None,
|
|
42
|
+
reduction="sum_over_batch_size",
|
|
43
|
+
name="depth_anything_loss",
|
|
44
|
+
dtype=None,
|
|
45
|
+
):
|
|
46
|
+
super().__init__(
|
|
47
|
+
silog,
|
|
48
|
+
name=name,
|
|
49
|
+
reduction=reduction,
|
|
50
|
+
dtype=dtype,
|
|
51
|
+
lambd=lambd,
|
|
52
|
+
min_depth=min_depth,
|
|
53
|
+
max_depth=max_depth,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def silog(
|
|
58
|
+
y_true, y_pred, lambd=0.5, min_depth=keras.config.epsilon(), max_depth=None
|
|
59
|
+
):
|
|
60
|
+
y_pred = ops.convert_to_tensor(y_pred)
|
|
61
|
+
y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype)
|
|
62
|
+
|
|
63
|
+
# Apply the valid mask.
|
|
64
|
+
if max_depth is None:
|
|
65
|
+
valid_mask = ops.greater_equal(y_true, min_depth)
|
|
66
|
+
else:
|
|
67
|
+
valid_mask = ops.logical_and(
|
|
68
|
+
ops.greater_equal(y_true, min_depth),
|
|
69
|
+
ops.less_equal(y_true, max_depth),
|
|
70
|
+
)
|
|
71
|
+
y_true = ops.multiply(y_true, valid_mask)
|
|
72
|
+
y_pred = ops.multiply(y_pred, valid_mask)
|
|
73
|
+
|
|
74
|
+
diff_log = ops.where(
|
|
75
|
+
valid_mask,
|
|
76
|
+
ops.subtract(ops.log(y_true), ops.log(y_pred)),
|
|
77
|
+
ops.zeros_like(y_true),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
divisor = ops.sum(ops.cast(valid_mask, y_true.dtype), axis=(1, 2, 3))
|
|
81
|
+
mean_power2_diff_log = ops.divide_no_nan(
|
|
82
|
+
ops.sum(ops.power(diff_log, 2), axis=(1, 2, 3)), divisor
|
|
83
|
+
)
|
|
84
|
+
power2_mean_diff_log = ops.power(
|
|
85
|
+
ops.divide_no_nan(ops.sum(diff_log, axis=(1, 2, 3)), divisor), 2
|
|
86
|
+
)
|
|
87
|
+
return ops.sqrt(
|
|
88
|
+
mean_power2_diff_log - ops.multiply(lambd, power2_mean_diff_log)
|
|
89
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from keras import backend
|
|
2
|
+
from keras import ops
|
|
3
|
+
|
|
4
|
+
from keras_hub.src.utils.keras_utils import standardize_data_format
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def interpolate(x, size, data_format=None):
|
|
8
|
+
"""Performs a backend-agnostic version of Torch's `F.interpolate`.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
x: A 4D image tensor.
|
|
12
|
+
size: A tuple of 2 integers, `(height, width)`.
|
|
13
|
+
data_format: One of `channels_last` or `channels_first`.
|
|
14
|
+
"""
|
|
15
|
+
data_format = standardize_data_format(data_format)
|
|
16
|
+
if backend.backend() == "jax":
|
|
17
|
+
import jax
|
|
18
|
+
|
|
19
|
+
if data_format == "channels_first":
|
|
20
|
+
x = ops.transpose(x, (0, 2, 3, 1))
|
|
21
|
+
scale = ops.convert_to_tensor(
|
|
22
|
+
[
|
|
23
|
+
(size[0] - 1.0) / (x.shape[1] - 1.0),
|
|
24
|
+
(size[1] - 1.0) / (x.shape[2] - 1.0),
|
|
25
|
+
]
|
|
26
|
+
)
|
|
27
|
+
translation = -(scale / 2.0 - 0.5)
|
|
28
|
+
x = jax.image.scale_and_translate(
|
|
29
|
+
x,
|
|
30
|
+
(x.shape[0], *size, x.shape[-1]),
|
|
31
|
+
method="bilinear",
|
|
32
|
+
scale=scale,
|
|
33
|
+
spatial_dims=(1, 2),
|
|
34
|
+
translation=translation,
|
|
35
|
+
antialias=False,
|
|
36
|
+
)
|
|
37
|
+
if data_format == "channels_first":
|
|
38
|
+
x = ops.transpose(x, (0, 3, 1, 2))
|
|
39
|
+
elif backend.backend() == "tensorflow":
|
|
40
|
+
import tensorflow as tf
|
|
41
|
+
|
|
42
|
+
if data_format == "channels_first":
|
|
43
|
+
x = ops.transpose(x, (0, 2, 3, 1))
|
|
44
|
+
x = tf.compat.v1.image.resize(
|
|
45
|
+
x,
|
|
46
|
+
size=size,
|
|
47
|
+
method="bilinear",
|
|
48
|
+
align_corners=True,
|
|
49
|
+
)
|
|
50
|
+
if data_format == "channels_first":
|
|
51
|
+
x = ops.transpose(x, (0, 3, 1, 2))
|
|
52
|
+
elif backend.backend() == "torch":
|
|
53
|
+
import torch.nn.functional as F
|
|
54
|
+
|
|
55
|
+
if data_format == "channels_last":
|
|
56
|
+
x = ops.transpose(x, (0, 3, 1, 2))
|
|
57
|
+
x = F.interpolate(x, size=size, mode="bilinear", align_corners=True)
|
|
58
|
+
if data_format == "channels_last":
|
|
59
|
+
x = ops.transpose(x, (0, 2, 3, 1))
|
|
60
|
+
else:
|
|
61
|
+
raise NotImplementedError(f"Unsupported backend: {backend.backend()}")
|
|
62
|
+
return x
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import keras
|
|
2
|
+
|
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
|
4
|
+
from keras_hub.src.models.task import Task
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Multiplier(keras.layers.Layer):
|
|
8
|
+
def __init__(self, multiplier=None, **kwargs):
|
|
9
|
+
super().__init__(**kwargs)
|
|
10
|
+
self.multiplier = float(multiplier) if multiplier is not None else None
|
|
11
|
+
|
|
12
|
+
def call(self, inputs):
|
|
13
|
+
if self.multiplier is not None:
|
|
14
|
+
inputs = keras.ops.multiply(inputs, self.multiplier)
|
|
15
|
+
return inputs
|
|
16
|
+
|
|
17
|
+
def get_config(self):
|
|
18
|
+
config = super().get_config()
|
|
19
|
+
config.update(
|
|
20
|
+
{
|
|
21
|
+
"multiplier": self.multiplier,
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
return config
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@keras_hub_export("keras_hub.models.DepthEstimator")
|
|
28
|
+
class DepthEstimator(Task):
|
|
29
|
+
"""Base class for all depth estimation tasks.
|
|
30
|
+
|
|
31
|
+
`DepthEstimator` tasks wrap a `keras_hub.models.Backbone` and
|
|
32
|
+
a `keras_hub.models.Preprocessor` to create a model that can be used for
|
|
33
|
+
depth estimation.
|
|
34
|
+
|
|
35
|
+
To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)`
|
|
36
|
+
labels where `x` is a RGB image and `y` is a depth map. All `DepthEstimator`
|
|
37
|
+
tasks include a `from_preset()` constructor which can be used to load a
|
|
38
|
+
pre-trained config and weights.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
backbone: A `keras_hub.models.Backbone` instance or a `keras.Model`.
|
|
42
|
+
preprocessor: `None`, a `keras_hub.models.Preprocessor` instance,
|
|
43
|
+
a `keras.Layer` instance, or a callable. If `None` no preprocessing
|
|
44
|
+
will be applied to the inputs.
|
|
45
|
+
depth_estimation_type: `"relative"` or `"metric"`. The type of depth map
|
|
46
|
+
to use. `"relative"` depth maps are up-to-scale, while `"metric"`
|
|
47
|
+
depth maps have metric meaning (e.g. in meters). Defaults to
|
|
48
|
+
`"relative"`.
|
|
49
|
+
min_depth: An float representing the minimum depth value. This value can
|
|
50
|
+
be used to filter out invalid depth values during training. Defaults
|
|
51
|
+
to `keras.config.epsilon()`.
|
|
52
|
+
max_depth: An optional float representing the maximum depth value. This
|
|
53
|
+
value can be used to filter out invalid depth values during
|
|
54
|
+
training. When `depth_estimation_type="metric"`, the model's output
|
|
55
|
+
will be scaled to the range `[0, max_depth]`.
|
|
56
|
+
|
|
57
|
+
Examples:
|
|
58
|
+
|
|
59
|
+
Call `predict()` to run inference.
|
|
60
|
+
```python
|
|
61
|
+
# Load preset and train
|
|
62
|
+
images = np.random.randint(0, 256, size=(2, 224, 224, 3))
|
|
63
|
+
depth_estimator = keras_hub.models.DepthEstimator.from_preset(
|
|
64
|
+
"depth_anything_v2_small"
|
|
65
|
+
)
|
|
66
|
+
depth_estimator.predict(images)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Call `fit()` on a single batch.
|
|
70
|
+
```python
|
|
71
|
+
# Load preset and train
|
|
72
|
+
images = np.random.randint(0, 256, size=(2, 224, 224, 3))
|
|
73
|
+
depths = np.random.uniform(0, 10, size=(2, 224, 224))
|
|
74
|
+
depth_estimator = keras_hub.models.DepthEstimator.from_preset(
|
|
75
|
+
"depth_anything_v2_small",
|
|
76
|
+
depth_estimation_type="metric",
|
|
77
|
+
max_depth=10.0,
|
|
78
|
+
)
|
|
79
|
+
depth_estimator.fit(x=images, y=depths, batch_size=2)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Call `fit()` with custom loss, optimizer and backbone.
|
|
83
|
+
```python
|
|
84
|
+
depth_estimator = keras_hub.models.DepthEstimator.from_preset(
|
|
85
|
+
"depth_anything_v2_small",
|
|
86
|
+
depth_estimation_type="metric",
|
|
87
|
+
max_depth=10.0,
|
|
88
|
+
)
|
|
89
|
+
depth_estimator.compile(
|
|
90
|
+
loss=keras.losses.MeanSquaredError(),
|
|
91
|
+
optimizer=keras.optimizers.Adam(5e-5),
|
|
92
|
+
)
|
|
93
|
+
depth_estimator.backbone.trainable = False
|
|
94
|
+
depth_estimator.fit(x=images, y=depths, batch_size=2)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Custom backbone.
|
|
98
|
+
```python
|
|
99
|
+
images = np.random.randint(0, 256, size=(2, 224, 224, 3))
|
|
100
|
+
depths = np.random.uniform(0, 10, size=(2, 224, 224))
|
|
101
|
+
image_encoder = keras_hub.models.DINOV2Backbone.from_preset("dinov2_small")
|
|
102
|
+
backbone = keras_hub.models.DepthAnythingBackbone(
|
|
103
|
+
image_encoder=image_encoder,
|
|
104
|
+
patch_size=image_encoder.patch_size,
|
|
105
|
+
backbone_hidden_dim=image_encoder.hidden_dim,
|
|
106
|
+
reassemble_factors=[4, 2, 1, 0.5],
|
|
107
|
+
neck_hidden_dims=[48, 96, 192, 384],
|
|
108
|
+
fusion_hidden_dim=64,
|
|
109
|
+
head_hidden_dim=32,
|
|
110
|
+
head_in_index=-1,
|
|
111
|
+
)
|
|
112
|
+
depth_estimator = keras_hub.models.DepthEstimator(
|
|
113
|
+
backbone=backbone,
|
|
114
|
+
depth_estimation_type="metric",
|
|
115
|
+
max_depth=10.0,
|
|
116
|
+
)
|
|
117
|
+
depth_estimator.fit(x=images, y=depths, batch_size=2)
|
|
118
|
+
```
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
def __init__(
|
|
122
|
+
self,
|
|
123
|
+
backbone,
|
|
124
|
+
depth_estimation_type,
|
|
125
|
+
min_depth=keras.config.epsilon(),
|
|
126
|
+
max_depth=None,
|
|
127
|
+
preprocessor=None,
|
|
128
|
+
**kwargs,
|
|
129
|
+
):
|
|
130
|
+
# === Layers ===
|
|
131
|
+
self.backbone = backbone
|
|
132
|
+
self.preprocessor = preprocessor
|
|
133
|
+
if depth_estimation_type == "relative":
|
|
134
|
+
self.output_activation = keras.layers.ReLU(
|
|
135
|
+
dtype=backbone.dtype_policy,
|
|
136
|
+
name="output_activation",
|
|
137
|
+
)
|
|
138
|
+
elif depth_estimation_type == "metric":
|
|
139
|
+
self.output_activation = keras.layers.Activation(
|
|
140
|
+
activation="sigmoid",
|
|
141
|
+
dtype=backbone.dtype_policy,
|
|
142
|
+
name="output_activation",
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
raise ValueError(
|
|
146
|
+
"`depth_estimation_type` should be either `'relative'` or "
|
|
147
|
+
"`'metric'`. "
|
|
148
|
+
f"Received: depth_estimation_type={depth_estimation_type}."
|
|
149
|
+
)
|
|
150
|
+
if max_depth is not None and depth_estimation_type != "metric":
|
|
151
|
+
raise ValueError(
|
|
152
|
+
"`max_depth` should only be set when "
|
|
153
|
+
"`depth_estimation_type='metric'`. "
|
|
154
|
+
f"Received: depth_estimation_type={depth_estimation_type}, "
|
|
155
|
+
f"max_depth={max_depth}."
|
|
156
|
+
)
|
|
157
|
+
self.multiplier = Multiplier(
|
|
158
|
+
multiplier=max_depth, dtype=backbone.dtype_policy, name="multiplier"
|
|
159
|
+
)
|
|
160
|
+
self.depths = keras.layers.Identity(
|
|
161
|
+
dtype=backbone.dtype_policy, name="depths"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# === Config ===
|
|
165
|
+
self.depth_estimation_type = depth_estimation_type
|
|
166
|
+
self.min_depth = float(min_depth) if min_depth is not None else None
|
|
167
|
+
self.max_depth = float(max_depth) if max_depth is not None else None
|
|
168
|
+
|
|
169
|
+
# === Functional Model ===
|
|
170
|
+
inputs = self.backbone.input
|
|
171
|
+
depths = self.backbone(inputs)
|
|
172
|
+
depths = self.output_activation(depths)
|
|
173
|
+
depths = self.multiplier(depths)
|
|
174
|
+
depths = self.depths(depths)
|
|
175
|
+
outputs = {"depths": depths}
|
|
176
|
+
super().__init__(
|
|
177
|
+
inputs=inputs,
|
|
178
|
+
outputs=outputs,
|
|
179
|
+
**kwargs,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def get_config(self):
|
|
183
|
+
# Backbone serialized in `super`
|
|
184
|
+
config = super().get_config()
|
|
185
|
+
config.update(
|
|
186
|
+
{
|
|
187
|
+
"depth_estimation_type": self.depth_estimation_type,
|
|
188
|
+
"min_depth": self.min_depth,
|
|
189
|
+
"max_depth": self.max_depth,
|
|
190
|
+
}
|
|
191
|
+
)
|
|
192
|
+
return config
|
|
193
|
+
|
|
194
|
+
def compile(
|
|
195
|
+
self,
|
|
196
|
+
optimizer="auto",
|
|
197
|
+
loss="auto",
|
|
198
|
+
*,
|
|
199
|
+
metrics="auto",
|
|
200
|
+
**kwargs,
|
|
201
|
+
):
|
|
202
|
+
"""Configures the `DepthEstimator` task for training.
|
|
203
|
+
|
|
204
|
+
The `DepthEstimator` task extends the default compilation signature of
|
|
205
|
+
`keras.Model.compile` with defaults for `optimizer`, `loss`, and
|
|
206
|
+
`metrics`. To override these defaults, pass any value
|
|
207
|
+
to these arguments during compilation.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer`
|
|
211
|
+
instance. Defaults to `"auto"`, which uses the default optimizer
|
|
212
|
+
for the given model and task. See `keras.Model.compile` and
|
|
213
|
+
`keras.optimizers` for more info on possible `optimizer` values.
|
|
214
|
+
loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance.
|
|
215
|
+
Defaults to `"auto"`, where a `keras.losses.MeanSquaredError`
|
|
216
|
+
loss will be applied for the depth estimation task. See
|
|
217
|
+
`keras.Model.compile` and `keras.losses` for more info on
|
|
218
|
+
possible `loss` values.
|
|
219
|
+
metrics: `"auto"`, or a dict of metrics to be evaluated by
|
|
220
|
+
the model during training and testing. Defaults to `"auto"`,
|
|
221
|
+
where a `keras.metrics.RootMeanSquaredError` will be applied to
|
|
222
|
+
track the accuracy of the model during training. See
|
|
223
|
+
`keras.Model.compile` and `keras.metrics` for more info on
|
|
224
|
+
possible `metrics` values.
|
|
225
|
+
**kwargs: See `keras.Model.compile` for a full list of arguments
|
|
226
|
+
supported by the compile method.
|
|
227
|
+
"""
|
|
228
|
+
if optimizer == "auto":
|
|
229
|
+
optimizer = keras.optimizers.AdamW(5e-5)
|
|
230
|
+
if loss == "auto":
|
|
231
|
+
loss = {"depths": keras.losses.MeanSquaredError()}
|
|
232
|
+
if metrics == "auto":
|
|
233
|
+
metrics = {"depths": keras.metrics.RootMeanSquaredError()}
|
|
234
|
+
super().compile(
|
|
235
|
+
optimizer=optimizer,
|
|
236
|
+
loss=loss,
|
|
237
|
+
metrics=metrics,
|
|
238
|
+
**kwargs,
|
|
239
|
+
)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import keras
|
|
2
|
+
|
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
|
4
|
+
from keras_hub.src.models.preprocessor import Preprocessor
|
|
5
|
+
from keras_hub.src.utils.tensor_utils import preprocessing_function
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@keras_hub_export("keras_hub.models.DepthEstimatorPreprocessor")
|
|
9
|
+
class DepthEstimatorPreprocessor(Preprocessor):
|
|
10
|
+
"""Base class for depth estimation preprocessing layers.
|
|
11
|
+
|
|
12
|
+
`DepthEstimatorPreprocessor` tasks wraps a
|
|
13
|
+
`keras_hub.layers.ImageConverter` to create a preprocessing layer for
|
|
14
|
+
depth estimation tasks. It is intended to be paired with a
|
|
15
|
+
`keras_hub.models.DepthEstimator` task.
|
|
16
|
+
|
|
17
|
+
All `DepthEstimatorPreprocessor` take inputs three inputs, `x`, `y`, and
|
|
18
|
+
`sample_weight`. `x`, the first input, should always be included. It can
|
|
19
|
+
be a image or batch of images. See examples below. `y` and `sample_weight`
|
|
20
|
+
are optional inputs that will be passed through unaltered. Usually, `y` will
|
|
21
|
+
be the depths, and `sample_weight` will not be provided.
|
|
22
|
+
|
|
23
|
+
The layer will output either `x`, an `(x, y)` tuple if depths were provided,
|
|
24
|
+
or an `(x, y, sample_weight)` tuple if depths and sample weight were
|
|
25
|
+
provided. `x` will be the input images after all model preprocessing has
|
|
26
|
+
been applied.
|
|
27
|
+
|
|
28
|
+
All `DepthEstimatorPreprocessor` tasks include a `from_preset()`
|
|
29
|
+
constructor which can be used to load a pre-trained config.
|
|
30
|
+
You can call the `from_preset()` constructor directly on this base class, in
|
|
31
|
+
which case the correct class for your model will be automatically
|
|
32
|
+
instantiated.
|
|
33
|
+
|
|
34
|
+
Examples.
|
|
35
|
+
```python
|
|
36
|
+
preprocessor = keras_hub.models.DepthEstimatorPreprocessor.from_preset(
|
|
37
|
+
"depth_anything_v2_small",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Resize a single image for DepthAnythingV2 Small.
|
|
41
|
+
x = np.random.randint(0, 256, (512, 512, 3))
|
|
42
|
+
x = preprocessor(x)
|
|
43
|
+
|
|
44
|
+
# Resize a labeled image.
|
|
45
|
+
x = np.random.randint(0, 256, (512, 512, 3))
|
|
46
|
+
y = np.random.uniform(0, 10, size=(512, 512))
|
|
47
|
+
x, y = preprocessor(x, y)
|
|
48
|
+
|
|
49
|
+
# Resize a batch of labeled images.
|
|
50
|
+
x = [
|
|
51
|
+
np.random.randint(0, 256, (512, 512, 3)),
|
|
52
|
+
np.zeros((512, 512, 3)),
|
|
53
|
+
]
|
|
54
|
+
y = [
|
|
55
|
+
np.random.uniform(0, 10, size=(512, 512)),
|
|
56
|
+
np.random.uniform(0, 10, size=(512, 512)),
|
|
57
|
+
]
|
|
58
|
+
x, y = preprocessor(x, y)
|
|
59
|
+
|
|
60
|
+
# Use a `tf.data.Dataset`.
|
|
61
|
+
ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
|
|
62
|
+
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
|
|
63
|
+
```
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
image_converter=None,
|
|
69
|
+
**kwargs,
|
|
70
|
+
):
|
|
71
|
+
super().__init__(**kwargs)
|
|
72
|
+
self.image_converter = image_converter
|
|
73
|
+
|
|
74
|
+
@preprocessing_function
|
|
75
|
+
def call(self, x, y=None, sample_weight=None):
|
|
76
|
+
if self.image_converter:
|
|
77
|
+
x = self.image_converter(x)
|
|
78
|
+
return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from keras import layers
|
|
2
2
|
|
|
3
3
|
from keras_hub.src.api_export import keras_hub_export
|
|
4
|
-
from keras_hub.src.models.backbone import Backbone
|
|
5
4
|
from keras_hub.src.models.dinov2.dinov2_layers import DINOV2Embedding
|
|
6
5
|
from keras_hub.src.models.dinov2.dinov2_layers import DINOV2Encoder
|
|
6
|
+
from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
|
|
7
7
|
from keras_hub.src.utils.keras_utils import standardize_data_format
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@keras_hub_export("keras_hub.models.DINOV2Backbone")
|
|
11
|
-
class DINOV2Backbone(
|
|
11
|
+
class DINOV2Backbone(FeaturePyramidBackbone):
|
|
12
12
|
"""DINOV2 core network with hyperparameters.
|
|
13
13
|
|
|
14
14
|
DINOV2 offers a powerful, generalist visual backbone learned entirely from
|
|
@@ -19,6 +19,10 @@ class DINOV2Backbone(Backbone):
|
|
|
19
19
|
DINOV2 model with any number of layers, heads, and embedding dimensions. To
|
|
20
20
|
load preset architectures and weights, use the `from_preset` constructor.
|
|
21
21
|
|
|
22
|
+
Note that this backbone is a Feature Pyramid Backbone that can output
|
|
23
|
+
intermediate feature maps from different stages of the model. See the
|
|
24
|
+
example below for how to access these feature pyramid outputs.
|
|
25
|
+
|
|
22
26
|
Note that this backbone supports interpolation of the position embeddings
|
|
23
27
|
to the input image shape. This is useful when the input image shape is
|
|
24
28
|
different from the shape used to train the position embeddings. The
|
|
@@ -50,6 +54,8 @@ class DINOV2Backbone(Backbone):
|
|
|
50
54
|
embeddings to the actual input shape. Defaults to `(518, 518)`.
|
|
51
55
|
antialias_in_interpolation: bool. Whether to use antialiasing in the
|
|
52
56
|
interpolation of the position embeddings. Defaults to `False`.
|
|
57
|
+
apply_layernorm: bool. Whether to apply layer normalization to the
|
|
58
|
+
outputs of each stage in the feature pyramid. Defaults to `False`.
|
|
53
59
|
data_format: `None` or str. If specified, either `"channels_last"` or
|
|
54
60
|
`"channels_first"`. The ordering of the dimensions in the
|
|
55
61
|
inputs. `"channels_last"` corresponds to inputs with shape
|
|
@@ -95,6 +101,16 @@ class DINOV2Backbone(Backbone):
|
|
|
95
101
|
position_embedding_shape=(518, 518),
|
|
96
102
|
)
|
|
97
103
|
model(input_data)
|
|
104
|
+
|
|
105
|
+
# Accessing feature pyramid outputs.
|
|
106
|
+
backbone = keras_hub.models.DINOV2Backbone.from_preset(
|
|
107
|
+
"dinov2_base", image_shape=(224, 224, 3)
|
|
108
|
+
)
|
|
109
|
+
model = keras.Model(
|
|
110
|
+
inputs=backbone.inputs,
|
|
111
|
+
outputs=backbone.pyramid_outputs,
|
|
112
|
+
)
|
|
113
|
+
features = model(input_data)
|
|
98
114
|
```
|
|
99
115
|
"""
|
|
100
116
|
|
|
@@ -114,6 +130,7 @@ class DINOV2Backbone(Backbone):
|
|
|
114
130
|
image_shape=(224, 224, 3),
|
|
115
131
|
position_embedding_shape=(518, 518, 3),
|
|
116
132
|
antialias_in_interpolation=False,
|
|
133
|
+
apply_layernorm=False,
|
|
117
134
|
data_format=None,
|
|
118
135
|
dtype=None,
|
|
119
136
|
name=None,
|
|
@@ -176,10 +193,16 @@ class DINOV2Backbone(Backbone):
|
|
|
176
193
|
)
|
|
177
194
|
|
|
178
195
|
# === Functional Model ===
|
|
196
|
+
pyramid_outputs = {}
|
|
179
197
|
image_input = layers.Input(shape=image_shape, name="images")
|
|
180
198
|
x = self.embeddings(image_input)
|
|
181
|
-
|
|
199
|
+
pyramid_outputs["stem"] = x
|
|
200
|
+
x, encoder_pyramid_outputs = self.encoder(x)
|
|
201
|
+
pyramid_outputs.update(encoder_pyramid_outputs)
|
|
182
202
|
x = self.layernorm(x)
|
|
203
|
+
if apply_layernorm:
|
|
204
|
+
for key in pyramid_outputs:
|
|
205
|
+
pyramid_outputs[key] = self.layernorm(pyramid_outputs[key])
|
|
183
206
|
outputs = x
|
|
184
207
|
super().__init__(
|
|
185
208
|
inputs={"images": image_input},
|
|
@@ -204,6 +227,8 @@ class DINOV2Backbone(Backbone):
|
|
|
204
227
|
self.image_shape = image_shape
|
|
205
228
|
self.position_embedding_shape = position_embedding_shape
|
|
206
229
|
self.antialias_in_interpolation = bool(antialias_in_interpolation)
|
|
230
|
+
self.apply_layernorm = apply_layernorm
|
|
231
|
+
self.pyramid_outputs = pyramid_outputs
|
|
207
232
|
|
|
208
233
|
def get_config(self):
|
|
209
234
|
config = super().get_config()
|
|
@@ -223,6 +248,7 @@ class DINOV2Backbone(Backbone):
|
|
|
223
248
|
"image_shape": self.image_shape,
|
|
224
249
|
"position_embedding_shape": self.position_embedding_shape,
|
|
225
250
|
"antialias_in_interpolation": self.antialias_in_interpolation,
|
|
251
|
+
"apply_layernorm": self.apply_layernorm,
|
|
226
252
|
}
|
|
227
253
|
)
|
|
228
254
|
return config
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import keras
|
|
1
2
|
from keras import backend
|
|
2
3
|
from keras import config
|
|
3
4
|
from keras import initializers
|
|
@@ -290,6 +291,10 @@ class DINOV2Embedding(layers.Layer):
|
|
|
290
291
|
output_shape[1] = 1 + self.num_register_tokens + patch_num**2
|
|
291
292
|
return output_shape
|
|
292
293
|
|
|
294
|
+
def compute_output_spec(self, inputs):
|
|
295
|
+
output_shape = self.compute_output_shape(inputs.shape)
|
|
296
|
+
return keras.KerasTensor(output_shape, dtype=self.compute_dtype)
|
|
297
|
+
|
|
293
298
|
@staticmethod
|
|
294
299
|
def _interpolate_position_embeddings(
|
|
295
300
|
position_embeddings,
|
|
@@ -861,10 +866,12 @@ class DINOV2Encoder(layers.Layer):
|
|
|
861
866
|
input_shape = layer.compute_output_shape(input_shape)
|
|
862
867
|
|
|
863
868
|
def call(self, inputs, training=None):
|
|
869
|
+
pyramid_outputs = {}
|
|
864
870
|
x = inputs
|
|
865
|
-
for layer in self.layers:
|
|
871
|
+
for layer_index, layer in enumerate(self.layers, start=1):
|
|
866
872
|
x = layer(x, training=training)
|
|
867
|
-
|
|
873
|
+
pyramid_outputs[f"stage{str(layer_index)}"] = x
|
|
874
|
+
return x, pyramid_outputs
|
|
868
875
|
|
|
869
876
|
def get_config(self):
|
|
870
877
|
config = super().get_config()
|
|
@@ -883,4 +890,7 @@ class DINOV2Encoder(layers.Layer):
|
|
|
883
890
|
return config
|
|
884
891
|
|
|
885
892
|
def compute_output_shape(self, input_shape):
|
|
886
|
-
|
|
893
|
+
pyramid_outputs = {}
|
|
894
|
+
for layer_index in range(1, len(self.layers) + 1):
|
|
895
|
+
pyramid_outputs[f"stage{str(layer_index)}"] = input_shape
|
|
896
|
+
return input_shape, pyramid_outputs
|