keras-hub-nightly 0.16.1.dev202410200345__py3-none-any.whl → 0.19.0.dev202412070351__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- keras_hub/api/layers/__init__.py +12 -0
- keras_hub/api/models/__init__.py +32 -0
- keras_hub/src/bounding_box/__init__.py +2 -0
- keras_hub/src/bounding_box/converters.py +102 -12
- keras_hub/src/layers/modeling/rms_normalization.py +34 -0
- keras_hub/src/layers/modeling/transformer_encoder.py +27 -7
- keras_hub/src/layers/preprocessing/image_converter.py +5 -0
- keras_hub/src/models/albert/albert_presets.py +0 -8
- keras_hub/src/models/bart/bart_presets.py +0 -6
- keras_hub/src/models/bert/bert_presets.py +0 -20
- keras_hub/src/models/bloom/bloom_presets.py +0 -16
- keras_hub/src/models/clip/__init__.py +5 -0
- keras_hub/src/models/clip/clip_backbone.py +286 -0
- keras_hub/src/models/clip/clip_encoder_block.py +19 -4
- keras_hub/src/models/clip/clip_image_converter.py +8 -0
- keras_hub/src/models/clip/clip_presets.py +93 -0
- keras_hub/src/models/clip/clip_text_encoder.py +4 -1
- keras_hub/src/models/clip/clip_tokenizer.py +18 -3
- keras_hub/src/models/clip/clip_vision_embedding.py +101 -0
- keras_hub/src/models/clip/clip_vision_encoder.py +159 -0
- keras_hub/src/models/deberta_v3/deberta_v3_presets.py +0 -10
- keras_hub/src/models/deeplab_v3/deeplab_v3_presets.py +0 -2
- keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter.py +5 -3
- keras_hub/src/models/densenet/densenet_backbone.py +1 -1
- keras_hub/src/models/densenet/densenet_presets.py +0 -6
- keras_hub/src/models/distil_bert/distil_bert_presets.py +0 -6
- keras_hub/src/models/efficientnet/__init__.py +9 -0
- keras_hub/src/models/efficientnet/cba.py +141 -0
- keras_hub/src/models/efficientnet/efficientnet_backbone.py +139 -56
- keras_hub/src/models/efficientnet/efficientnet_image_classifier.py +14 -0
- keras_hub/src/models/efficientnet/efficientnet_image_classifier_preprocessor.py +16 -0
- keras_hub/src/models/efficientnet/efficientnet_image_converter.py +10 -0
- keras_hub/src/models/efficientnet/efficientnet_presets.py +192 -0
- keras_hub/src/models/efficientnet/fusedmbconv.py +81 -36
- keras_hub/src/models/efficientnet/mbconv.py +52 -21
- keras_hub/src/models/electra/electra_presets.py +0 -12
- keras_hub/src/models/f_net/f_net_presets.py +0 -4
- keras_hub/src/models/falcon/falcon_presets.py +0 -2
- keras_hub/src/models/flux/__init__.py +5 -0
- keras_hub/src/models/flux/flux_layers.py +494 -0
- keras_hub/src/models/flux/flux_maths.py +218 -0
- keras_hub/src/models/flux/flux_model.py +231 -0
- keras_hub/src/models/flux/flux_presets.py +14 -0
- keras_hub/src/models/flux/flux_text_to_image.py +142 -0
- keras_hub/src/models/flux/flux_text_to_image_preprocessor.py +73 -0
- keras_hub/src/models/gemma/gemma_presets.py +0 -40
- keras_hub/src/models/gpt2/gpt2_presets.py +0 -9
- keras_hub/src/models/image_object_detector.py +87 -0
- keras_hub/src/models/image_object_detector_preprocessor.py +57 -0
- keras_hub/src/models/image_to_image.py +16 -10
- keras_hub/src/models/inpaint.py +20 -13
- keras_hub/src/models/llama/llama_backbone.py +1 -1
- keras_hub/src/models/llama/llama_presets.py +5 -15
- keras_hub/src/models/llama3/llama3_presets.py +0 -8
- keras_hub/src/models/mistral/mistral_presets.py +0 -6
- keras_hub/src/models/mit/mit_backbone.py +41 -27
- keras_hub/src/models/mit/mit_layers.py +9 -7
- keras_hub/src/models/mit/mit_presets.py +12 -24
- keras_hub/src/models/opt/opt_presets.py +0 -8
- keras_hub/src/models/pali_gemma/pali_gemma_backbone.py +61 -11
- keras_hub/src/models/pali_gemma/pali_gemma_decoder_block.py +21 -23
- keras_hub/src/models/pali_gemma/pali_gemma_presets.py +166 -10
- keras_hub/src/models/pali_gemma/pali_gemma_vit.py +12 -11
- keras_hub/src/models/phi3/phi3_presets.py +0 -4
- keras_hub/src/models/resnet/resnet_presets.py +10 -42
- keras_hub/src/models/retinanet/__init__.py +5 -0
- keras_hub/src/models/retinanet/anchor_generator.py +52 -53
- keras_hub/src/models/retinanet/feature_pyramid.py +99 -36
- keras_hub/src/models/retinanet/non_max_supression.py +1 -0
- keras_hub/src/models/retinanet/prediction_head.py +192 -0
- keras_hub/src/models/retinanet/retinanet_backbone.py +146 -0
- keras_hub/src/models/retinanet/retinanet_image_converter.py +53 -0
- keras_hub/src/models/retinanet/retinanet_label_encoder.py +49 -51
- keras_hub/src/models/retinanet/retinanet_object_detector.py +382 -0
- keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +14 -0
- keras_hub/src/models/retinanet/retinanet_presets.py +15 -0
- keras_hub/src/models/roberta/roberta_presets.py +0 -4
- keras_hub/src/models/sam/sam_backbone.py +0 -1
- keras_hub/src/models/sam/sam_image_segmenter.py +9 -10
- keras_hub/src/models/sam/sam_presets.py +0 -6
- keras_hub/src/models/segformer/__init__.py +8 -0
- keras_hub/src/models/segformer/segformer_backbone.py +163 -0
- keras_hub/src/models/segformer/segformer_image_converter.py +8 -0
- keras_hub/src/models/segformer/segformer_image_segmenter.py +171 -0
- keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +31 -0
- keras_hub/src/models/segformer/segformer_presets.py +124 -0
- keras_hub/src/models/stable_diffusion_3/mmdit.py +41 -0
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_backbone.py +38 -21
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_image_to_image.py +3 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_inpaint.py +3 -3
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_presets.py +28 -4
- keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image.py +1 -1
- keras_hub/src/models/t5/t5_backbone.py +5 -4
- keras_hub/src/models/t5/t5_presets.py +41 -13
- keras_hub/src/models/text_to_image.py +13 -5
- keras_hub/src/models/vgg/vgg_backbone.py +1 -1
- keras_hub/src/models/vgg/vgg_presets.py +0 -8
- keras_hub/src/models/whisper/whisper_audio_converter.py +1 -1
- keras_hub/src/models/whisper/whisper_presets.py +0 -20
- keras_hub/src/models/xlm_roberta/xlm_roberta_presets.py +0 -4
- keras_hub/src/tests/test_case.py +25 -0
- keras_hub/src/utils/preset_utils.py +17 -4
- keras_hub/src/utils/timm/convert_efficientnet.py +449 -0
- keras_hub/src/utils/timm/preset_loader.py +3 -0
- keras_hub/src/version_utils.py +1 -1
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/METADATA +15 -26
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/RECORD +109 -76
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/WHEEL +1 -1
- {keras_hub_nightly-0.16.1.dev202410200345.dist-info → keras_hub_nightly-0.19.0.dev202412070351.dist-info}/top_level.txt +0 -0
@@ -51,9 +51,9 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
51
51
|
(batch_size, 0, image_size, image_size, 1)
|
52
52
|
),
|
53
53
|
}
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
sam = keras_hub.models.SAMImageSegmenter.from_preset('sam_base_sa1b')
|
55
|
+
outputs = sam.predict(input_data)
|
56
|
+
masks, iou_pred = outputs["masks"], outputs["iou_pred"]
|
57
57
|
```
|
58
58
|
|
59
59
|
Load segment anything image segmenter with custom backbone
|
@@ -65,7 +65,7 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
65
65
|
(batch_size, image_size, image_size, 3),
|
66
66
|
dtype="float32",
|
67
67
|
)
|
68
|
-
image_encoder = ViTDetBackbone(
|
68
|
+
image_encoder = keras_hub.models.ViTDetBackbone(
|
69
69
|
hidden_size=16,
|
70
70
|
num_layers=16,
|
71
71
|
intermediate_dim=16 * 4,
|
@@ -76,7 +76,7 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
76
76
|
window_size=2,
|
77
77
|
image_shape=(image_size, image_size, 3),
|
78
78
|
)
|
79
|
-
prompt_encoder = SAMPromptEncoder(
|
79
|
+
prompt_encoder = keras_hub.layers.SAMPromptEncoder(
|
80
80
|
hidden_size=8,
|
81
81
|
image_embedding_size=(8, 8),
|
82
82
|
input_image_size=(
|
@@ -85,7 +85,7 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
85
85
|
),
|
86
86
|
mask_in_channels=16,
|
87
87
|
)
|
88
|
-
mask_decoder = SAMMaskDecoder(
|
88
|
+
mask_decoder = keras_hub.layers.SAMMaskDecoder(
|
89
89
|
num_layers=2,
|
90
90
|
hidden_size=8,
|
91
91
|
intermediate_dim=32,
|
@@ -95,13 +95,12 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
95
95
|
iou_head_depth=3,
|
96
96
|
iou_head_hidden_dim=8,
|
97
97
|
)
|
98
|
-
backbone = SAMBackbone(
|
98
|
+
backbone = keras_hub.models.SAMBackbone(
|
99
99
|
image_encoder=image_encoder,
|
100
100
|
prompt_encoder=prompt_encoder,
|
101
101
|
mask_decoder=mask_decoder,
|
102
|
-
image_shape=(image_size, image_size, 3),
|
103
102
|
)
|
104
|
-
sam = SAMImageSegmenter(
|
103
|
+
sam = keras_hub.models.SAMImageSegmenter(
|
105
104
|
backbone=backbone
|
106
105
|
)
|
107
106
|
```
|
@@ -115,7 +114,7 @@ class SAMImageSegmenter(ImageSegmenter):
|
|
115
114
|
labels = np.array([[1., 0.]])
|
116
115
|
box = np.array([[[[384., 384.], [640., 640.]]]])
|
117
116
|
input_mask = np.ones((1, 1, 256, 256, 1))
|
118
|
-
Prepare an input dictionary:
|
117
|
+
# Prepare an input dictionary:
|
119
118
|
inputs = {
|
120
119
|
"images": image,
|
121
120
|
"points": points,
|
@@ -5,9 +5,7 @@ backbone_presets = {
|
|
5
5
|
"metadata": {
|
6
6
|
"description": ("The base SAM model trained on the SA1B dataset."),
|
7
7
|
"params": 93735728,
|
8
|
-
"official_name": "SAMImageSegmenter",
|
9
8
|
"path": "sam",
|
10
|
-
"model_card": "https://arxiv.org/abs/2304.02643",
|
11
9
|
},
|
12
10
|
"kaggle_handle": "kaggle://keras/sam/keras/sam_base_sa1b/4",
|
13
11
|
},
|
@@ -15,9 +13,7 @@ backbone_presets = {
|
|
15
13
|
"metadata": {
|
16
14
|
"description": ("The large SAM model trained on the SA1B dataset."),
|
17
15
|
"params": 641090864,
|
18
|
-
"official_name": "SAMImageSegmenter",
|
19
16
|
"path": "sam",
|
20
|
-
"model_card": "https://arxiv.org/abs/2304.02643",
|
21
17
|
},
|
22
18
|
"kaggle_handle": "kaggle://keras/sam/keras/sam_large_sa1b/4",
|
23
19
|
},
|
@@ -25,9 +21,7 @@ backbone_presets = {
|
|
25
21
|
"metadata": {
|
26
22
|
"description": ("The huge SAM model trained on the SA1B dataset."),
|
27
23
|
"params": 312343088,
|
28
|
-
"official_name": "SAMImageSegmenter",
|
29
24
|
"path": "sam",
|
30
|
-
"model_card": "https://arxiv.org/abs/2304.02643",
|
31
25
|
},
|
32
26
|
"kaggle_handle": "kaggle://keras/sam/keras/sam_huge_sa1b/4",
|
33
27
|
},
|
@@ -0,0 +1,8 @@
|
|
1
|
+
from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone
|
2
|
+
from keras_hub.src.models.segformer.segformer_image_segmenter import (
|
3
|
+
SegFormerImageSegmenter,
|
4
|
+
)
|
5
|
+
from keras_hub.src.models.segformer.segformer_presets import presets
|
6
|
+
from keras_hub.src.utils.preset_utils import register_presets
|
7
|
+
|
8
|
+
register_presets(presets, SegFormerImageSegmenter)
|
@@ -0,0 +1,163 @@
|
|
1
|
+
import keras
|
2
|
+
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
4
|
+
from keras_hub.src.models.backbone import Backbone
|
5
|
+
|
6
|
+
|
7
|
+
@keras_hub_export("keras_hub.models.SegFormerBackbone")
|
8
|
+
class SegFormerBackbone(Backbone):
|
9
|
+
"""A Keras model implementing the SegFormer architecture for semantic segmentation.
|
10
|
+
|
11
|
+
This class implements the majority of the SegFormer architecture described in
|
12
|
+
[SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers]
|
13
|
+
(https://arxiv.org/abs/2105.15203) and [based on the TensorFlow implementation from DeepVision]
|
14
|
+
(https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer).
|
15
|
+
|
16
|
+
SegFormers are meant to be used with the MixTransformer (MiT) encoder family, and
|
17
|
+
and use a very lightweight all-MLP decoder head.
|
18
|
+
|
19
|
+
The MiT encoder uses a hierarchical transformer which outputs features at multiple scales,
|
20
|
+
similar to that of the hierarchical outputs typically associated with CNNs.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
image_encoder: `keras.Model`. The backbone network for the model that is
|
24
|
+
used as a feature extractor for the SegFormer encoder.
|
25
|
+
Should be used with the MiT backbone model
|
26
|
+
(`keras_hub.models.MiTBackbone`) which was created
|
27
|
+
specifically for SegFormers.
|
28
|
+
num_classes: int, the number of classes for the detection model,
|
29
|
+
including the background class.
|
30
|
+
projection_filters: int, number of filters in the
|
31
|
+
convolution layer projecting the concatenated features into
|
32
|
+
a segmentation map. Defaults to 256`.
|
33
|
+
|
34
|
+
Example:
|
35
|
+
|
36
|
+
Using the class with a custom `backbone`:
|
37
|
+
|
38
|
+
```python
|
39
|
+
import keras_hub
|
40
|
+
|
41
|
+
backbone = keras_hub.models.MiTBackbone(
|
42
|
+
depths=[2, 2, 2, 2],
|
43
|
+
image_shape=(224, 224, 3),
|
44
|
+
hidden_dims=[32, 64, 160, 256],
|
45
|
+
num_layers=4,
|
46
|
+
blockwise_num_heads=[1, 2, 5, 8],
|
47
|
+
blockwise_sr_ratios=[8, 4, 2, 1],
|
48
|
+
max_drop_path_rate=0.1,
|
49
|
+
patch_sizes=[7, 3, 3, 3],
|
50
|
+
strides=[4, 2, 2, 2],
|
51
|
+
)
|
52
|
+
|
53
|
+
segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone, projection_filters=256)
|
54
|
+
```
|
55
|
+
|
56
|
+
Using the class with a preset `backbone`:
|
57
|
+
|
58
|
+
```python
|
59
|
+
import keras_hub
|
60
|
+
|
61
|
+
backbone = keras_hub.models.MiTBackbone.from_preset("mit_b0_ade20k_512")
|
62
|
+
segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone, projection_filters=256)
|
63
|
+
```
|
64
|
+
|
65
|
+
"""
|
66
|
+
|
67
|
+
def __init__(
|
68
|
+
self,
|
69
|
+
image_encoder,
|
70
|
+
projection_filters,
|
71
|
+
**kwargs,
|
72
|
+
):
|
73
|
+
if not isinstance(image_encoder, keras.layers.Layer) or not isinstance(
|
74
|
+
image_encoder, keras.Model
|
75
|
+
):
|
76
|
+
raise ValueError(
|
77
|
+
"Argument `image_encoder` must be a `keras.layers.Layer` instance "
|
78
|
+
f" or `keras.Model`. Received instead "
|
79
|
+
f"image_encoder={image_encoder} (of type {type(image_encoder)})."
|
80
|
+
)
|
81
|
+
|
82
|
+
# === Layers ===
|
83
|
+
inputs = keras.layers.Input(shape=image_encoder.input.shape[1:])
|
84
|
+
|
85
|
+
self.feature_extractor = keras.Model(
|
86
|
+
image_encoder.inputs, image_encoder.pyramid_outputs
|
87
|
+
)
|
88
|
+
|
89
|
+
features = self.feature_extractor(inputs)
|
90
|
+
# Get height and width of level one output
|
91
|
+
_, height, width, _ = features["P1"].shape
|
92
|
+
|
93
|
+
self.mlp_blocks = []
|
94
|
+
|
95
|
+
for feature_dim, feature in zip(image_encoder.hidden_dims, features):
|
96
|
+
self.mlp_blocks.append(
|
97
|
+
keras.layers.Dense(
|
98
|
+
projection_filters, name=f"linear_{feature_dim}"
|
99
|
+
)
|
100
|
+
)
|
101
|
+
|
102
|
+
self.resizing = keras.layers.Resizing(
|
103
|
+
height, width, interpolation="bilinear"
|
104
|
+
)
|
105
|
+
self.concat = keras.layers.Concatenate(axis=-1)
|
106
|
+
self.linear_fuse = keras.Sequential(
|
107
|
+
[
|
108
|
+
keras.layers.Conv2D(
|
109
|
+
filters=projection_filters, kernel_size=1, use_bias=False
|
110
|
+
),
|
111
|
+
keras.layers.BatchNormalization(epsilon=1e-5, momentum=0.9),
|
112
|
+
keras.layers.Activation("relu"),
|
113
|
+
]
|
114
|
+
)
|
115
|
+
|
116
|
+
# === Functional Model ===
|
117
|
+
# Project all multi-level outputs onto
|
118
|
+
# the same dimensionality and feature map shape
|
119
|
+
multi_layer_outs = []
|
120
|
+
for index, (feature_dim, feature) in enumerate(
|
121
|
+
zip(image_encoder.hidden_dims, features)
|
122
|
+
):
|
123
|
+
out = self.mlp_blocks[index](features[feature])
|
124
|
+
out = self.resizing(out)
|
125
|
+
multi_layer_outs.append(out)
|
126
|
+
|
127
|
+
# Concat now-equal feature maps
|
128
|
+
concatenated_outs = self.concat(multi_layer_outs[::-1])
|
129
|
+
|
130
|
+
# Fuse concatenated features into a segmentation map
|
131
|
+
seg = self.linear_fuse(concatenated_outs)
|
132
|
+
|
133
|
+
super().__init__(
|
134
|
+
inputs=inputs,
|
135
|
+
outputs=seg,
|
136
|
+
**kwargs,
|
137
|
+
)
|
138
|
+
|
139
|
+
# === Config ===
|
140
|
+
self.projection_filters = projection_filters
|
141
|
+
self.image_encoder = image_encoder
|
142
|
+
|
143
|
+
def get_config(self):
|
144
|
+
config = super().get_config()
|
145
|
+
config.update(
|
146
|
+
{
|
147
|
+
"projection_filters": self.projection_filters,
|
148
|
+
"image_encoder": keras.saving.serialize_keras_object(
|
149
|
+
self.image_encoder
|
150
|
+
),
|
151
|
+
}
|
152
|
+
)
|
153
|
+
return config
|
154
|
+
|
155
|
+
@classmethod
|
156
|
+
def from_config(cls, config):
|
157
|
+
if "image_encoder" in config and isinstance(
|
158
|
+
config["image_encoder"], dict
|
159
|
+
):
|
160
|
+
config["image_encoder"] = keras.layers.deserialize(
|
161
|
+
config["image_encoder"]
|
162
|
+
)
|
163
|
+
return super().from_config(config)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
from keras_hub.src.api_export import keras_hub_export
|
2
|
+
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
|
3
|
+
from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone
|
4
|
+
|
5
|
+
|
6
|
+
@keras_hub_export("keras_hub.layers.SegFormerImageConverter")
|
7
|
+
class SegFormerImageConverter(ImageConverter):
|
8
|
+
backbone_cls = SegFormerBackbone
|
@@ -0,0 +1,171 @@
|
|
1
|
+
import keras
|
2
|
+
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
4
|
+
from keras_hub.src.models.image_segmenter import ImageSegmenter
|
5
|
+
from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone
|
6
|
+
from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor import (
|
7
|
+
SegFormerImageSegmenterPreprocessor,
|
8
|
+
)
|
9
|
+
|
10
|
+
|
11
|
+
@keras_hub_export("keras_hub.models.SegFormerImageSegmenter")
|
12
|
+
class SegFormerImageSegmenter(ImageSegmenter):
|
13
|
+
"""A Keras model implementing the SegFormer architecture for semantic segmentation.
|
14
|
+
|
15
|
+
This class implements the segmentation head of the SegFormer architecture described in
|
16
|
+
[SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers]
|
17
|
+
(https://arxiv.org/abs/2105.15203) and [based on the TensorFlow implementation from DeepVision]
|
18
|
+
(https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer).
|
19
|
+
|
20
|
+
SegFormers are meant to be used with the MixTransformer (MiT) encoder family, and
|
21
|
+
and use a very lightweight all-MLP decoder head.
|
22
|
+
|
23
|
+
The MiT encoder uses a hierarchical transformer which outputs features at multiple scales,
|
24
|
+
similar to that of the hierarchical outputs typically associated with CNNs.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
image_encoder: `keras.Model`. The backbone network for the model that is
|
28
|
+
used as a feature extractor for the SegFormer encoder.
|
29
|
+
It is *intended* to be used only with the MiT backbone model
|
30
|
+
(`keras_hub.models.MiTBackbone`) which was created
|
31
|
+
specifically for SegFormers.
|
32
|
+
Alternatively, can be a `keras_hub.models.Backbone` a model subclassing
|
33
|
+
`keras_hub.models.FeaturePyramidBackbone`, or a `keras.Model`
|
34
|
+
that has a `pyramid_outputs` property which is
|
35
|
+
a dictionary with keys "P2", "P3", "P4", and "P5" and layer names as values.
|
36
|
+
num_classes: int, the number of classes for the detection model,
|
37
|
+
including the background class.
|
38
|
+
projection_filters: int, number of filters in the
|
39
|
+
convolution layer projecting the concatenated features into
|
40
|
+
a segmentation map. Defaults to 256`.
|
41
|
+
|
42
|
+
|
43
|
+
Example:
|
44
|
+
|
45
|
+
Using presets:
|
46
|
+
|
47
|
+
```python
|
48
|
+
import keras_hub
|
49
|
+
import numpy as np
|
50
|
+
|
51
|
+
segmenter = keras_hub.models.SegFormerImageSegmenter.from_preset("segformer_b0_ade20k_512")
|
52
|
+
|
53
|
+
images = np.random.rand(1, 512, 512, 3)
|
54
|
+
segformer(images)
|
55
|
+
```
|
56
|
+
|
57
|
+
Using the SegFormer backbone:
|
58
|
+
|
59
|
+
```python
|
60
|
+
encoder = keras_hub.models.MiTBackbone.from_preset("mit_b0_ade20k_512")
|
61
|
+
backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256)
|
62
|
+
```
|
63
|
+
|
64
|
+
Using the SegFormer backbone with a custom encoder:
|
65
|
+
|
66
|
+
```python
|
67
|
+
import keras
|
68
|
+
import keras_hub
|
69
|
+
import numpy as np
|
70
|
+
|
71
|
+
images = np.ones(shape=(1, 96, 96, 3))
|
72
|
+
labels = np.zeros(shape=(1, 96, 96, 1))
|
73
|
+
|
74
|
+
encoder = keras_hub.models.MiTBackbone(
|
75
|
+
depths=[2, 2, 2, 2],
|
76
|
+
image_shape=(96, 96, 3),
|
77
|
+
hidden_dims=[32, 64, 160, 256],
|
78
|
+
num_layers=4,
|
79
|
+
blockwise_num_heads=[1, 2, 5, 8],
|
80
|
+
blockwise_sr_ratios=[8, 4, 2, 1],
|
81
|
+
max_drop_path_rate=0.1,
|
82
|
+
patch_sizes=[7, 3, 3, 3],
|
83
|
+
strides=[4, 2, 2, 2],
|
84
|
+
)
|
85
|
+
|
86
|
+
backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256)
|
87
|
+
segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4)
|
88
|
+
|
89
|
+
segformer(images)
|
90
|
+
```
|
91
|
+
|
92
|
+
Using the segmentor class with a preset backbone:
|
93
|
+
|
94
|
+
```python
|
95
|
+
import keras_hub
|
96
|
+
|
97
|
+
image_encoder = keras_hub.models.MiTBackbone.from_preset("mit_b0_ade20k_512")
|
98
|
+
backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256)
|
99
|
+
segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4)
|
100
|
+
```
|
101
|
+
"""
|
102
|
+
|
103
|
+
backbone_cls = SegFormerBackbone
|
104
|
+
preprocessor_cls = SegFormerImageSegmenterPreprocessor
|
105
|
+
|
106
|
+
def __init__(
|
107
|
+
self,
|
108
|
+
backbone,
|
109
|
+
num_classes,
|
110
|
+
preprocessor=None,
|
111
|
+
**kwargs,
|
112
|
+
):
|
113
|
+
if not isinstance(backbone, keras.layers.Layer) or not isinstance(
|
114
|
+
backbone, keras.Model
|
115
|
+
):
|
116
|
+
raise ValueError(
|
117
|
+
"Argument `backbone` must be a `keras.layers.Layer` instance "
|
118
|
+
f" or `keras.Model`. Received instead "
|
119
|
+
f"backbone={backbone} (of type {type(backbone)})."
|
120
|
+
)
|
121
|
+
|
122
|
+
# === Layers ===
|
123
|
+
inputs = backbone.input
|
124
|
+
|
125
|
+
self.backbone = backbone
|
126
|
+
self.preprocessor = preprocessor
|
127
|
+
self.dropout = keras.layers.Dropout(0.1)
|
128
|
+
self.output_segmentation_head = keras.layers.Conv2D(
|
129
|
+
filters=num_classes, kernel_size=1, strides=1
|
130
|
+
)
|
131
|
+
self.resizing = keras.layers.Resizing(
|
132
|
+
height=inputs.shape[1],
|
133
|
+
width=inputs.shape[2],
|
134
|
+
interpolation="bilinear",
|
135
|
+
)
|
136
|
+
|
137
|
+
# === Functional Model ===
|
138
|
+
x = self.backbone(inputs)
|
139
|
+
x = self.dropout(x)
|
140
|
+
x = self.output_segmentation_head(x)
|
141
|
+
output = self.resizing(x)
|
142
|
+
|
143
|
+
super().__init__(
|
144
|
+
inputs=inputs,
|
145
|
+
outputs=output,
|
146
|
+
**kwargs,
|
147
|
+
)
|
148
|
+
|
149
|
+
# === Config ===
|
150
|
+
self.num_classes = num_classes
|
151
|
+
self.backbone = backbone
|
152
|
+
|
153
|
+
def get_config(self):
|
154
|
+
config = super().get_config()
|
155
|
+
config.update(
|
156
|
+
{
|
157
|
+
"num_classes": self.num_classes,
|
158
|
+
"backbone": keras.saving.serialize_keras_object(self.backbone),
|
159
|
+
}
|
160
|
+
)
|
161
|
+
return config
|
162
|
+
|
163
|
+
@classmethod
|
164
|
+
def from_config(cls, config):
|
165
|
+
if "image_encoder" in config and isinstance(
|
166
|
+
config["image_encoder"], dict
|
167
|
+
):
|
168
|
+
config["image_encoder"] = keras.layers.deserialize(
|
169
|
+
config["image_encoder"]
|
170
|
+
)
|
171
|
+
return super().from_config(config)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import keras
|
2
|
+
|
3
|
+
from keras_hub.src.api_export import keras_hub_export
|
4
|
+
from keras_hub.src.models.image_segmenter_preprocessor import (
|
5
|
+
ImageSegmenterPreprocessor,
|
6
|
+
)
|
7
|
+
from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone
|
8
|
+
from keras_hub.src.models.segformer.segformer_image_converter import (
|
9
|
+
SegFormerImageConverter,
|
10
|
+
)
|
11
|
+
from keras_hub.src.utils.tensor_utils import preprocessing_function
|
12
|
+
|
13
|
+
IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
|
14
|
+
IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225]
|
15
|
+
|
16
|
+
|
17
|
+
@keras_hub_export("keras_hub.models.SegFormerImageSegmenterPreprocessor")
|
18
|
+
class SegFormerImageSegmenterPreprocessor(ImageSegmenterPreprocessor):
|
19
|
+
backbone_cls = SegFormerBackbone
|
20
|
+
image_converter_cls = SegFormerImageConverter
|
21
|
+
|
22
|
+
@preprocessing_function
|
23
|
+
def call(self, x, y=None, sample_weight=None):
|
24
|
+
if self.image_converter:
|
25
|
+
x = self.image_converter(x)
|
26
|
+
y = self.image_converter(y)
|
27
|
+
|
28
|
+
x = x / 255
|
29
|
+
x = (x - IMAGENET_DEFAULT_MEAN) / IMAGENET_DEFAULT_STD
|
30
|
+
|
31
|
+
return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
|
@@ -0,0 +1,124 @@
|
|
1
|
+
"""SegFormer model preset configurations."""
|
2
|
+
|
3
|
+
presets = {
|
4
|
+
"segformer_b0_ade20k_512": {
|
5
|
+
"metadata": {
|
6
|
+
"description": (
|
7
|
+
"SegFormer model with MiTB0 backbone fine-tuned on ADE20k in 512x512 resolution."
|
8
|
+
),
|
9
|
+
"params": 3719027,
|
10
|
+
"path": "segformer_b0",
|
11
|
+
},
|
12
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b0_ade20k_512/2",
|
13
|
+
},
|
14
|
+
"segformer_b1_ade20k_512": {
|
15
|
+
"metadata": {
|
16
|
+
"description": (
|
17
|
+
"SegFormer model with MiTB1 backbone fine-tuned on ADE20k in 512x512 resolution."
|
18
|
+
),
|
19
|
+
"params": 13682643,
|
20
|
+
"path": "segformer_b1",
|
21
|
+
},
|
22
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b1_ade20k_512/2",
|
23
|
+
},
|
24
|
+
"segformer_b2_ade20k_512": {
|
25
|
+
"metadata": {
|
26
|
+
"description": (
|
27
|
+
"SegFormer model with MiTB2 backbone fine-tuned on ADE20k in 512x512 resolution."
|
28
|
+
),
|
29
|
+
"params": 24727507,
|
30
|
+
"path": "segformer_b2",
|
31
|
+
},
|
32
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b2_ade20k_512/2",
|
33
|
+
},
|
34
|
+
"segformer_b3_ade20k_512": {
|
35
|
+
"metadata": {
|
36
|
+
"description": (
|
37
|
+
"SegFormer model with MiTB3 backbone fine-tuned on ADE20k in 512x512 resolution."
|
38
|
+
),
|
39
|
+
"params": 44603347,
|
40
|
+
"path": "segformer_b3",
|
41
|
+
},
|
42
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b3_ade20k_512/2",
|
43
|
+
},
|
44
|
+
"segformer_b4_ade20k_512": {
|
45
|
+
"metadata": {
|
46
|
+
"description": (
|
47
|
+
"SegFormer model with MiTB4 backbone fine-tuned on ADE20k in 512x512 resolution."
|
48
|
+
),
|
49
|
+
"params": 61373907,
|
50
|
+
"path": "segformer_b4",
|
51
|
+
},
|
52
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b4_ade20k_512/2",
|
53
|
+
},
|
54
|
+
"segformer_b5_ade20k_640": {
|
55
|
+
"metadata": {
|
56
|
+
"description": (
|
57
|
+
"SegFormer model with MiTB5 backbone fine-tuned on ADE20k in 640x640 resolution."
|
58
|
+
),
|
59
|
+
"params": 81974227,
|
60
|
+
"path": "segformer_b5",
|
61
|
+
},
|
62
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b5_ade20k_640/2",
|
63
|
+
},
|
64
|
+
"segformer_b0_cityscapes_1024": {
|
65
|
+
"metadata": {
|
66
|
+
"description": (
|
67
|
+
"SegFormer model with MiTB0 backbone fine-tuned on Cityscapes in 1024x1024 resolution."
|
68
|
+
),
|
69
|
+
"params": 3719027,
|
70
|
+
"path": "segformer_b0",
|
71
|
+
},
|
72
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b0_cityscapes_1024/2",
|
73
|
+
},
|
74
|
+
"segformer_b1_cityscapes_1024": {
|
75
|
+
"metadata": {
|
76
|
+
"description": (
|
77
|
+
"SegFormer model with MiTB1 backbone fine-tuned on Cityscapes in 1024x1024 resolution."
|
78
|
+
),
|
79
|
+
"params": 13682643,
|
80
|
+
"path": "segformer_b1",
|
81
|
+
},
|
82
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b1_ade20k_512/2",
|
83
|
+
},
|
84
|
+
"segformer_b2_cityscapes_1024": {
|
85
|
+
"metadata": {
|
86
|
+
"description": (
|
87
|
+
"SegFormer model with MiTB2 backbone fine-tuned on Cityscapes in 1024x1024 resolution."
|
88
|
+
),
|
89
|
+
"params": 24727507,
|
90
|
+
"path": "segformer_b2",
|
91
|
+
},
|
92
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b2_cityscapes_1024/2",
|
93
|
+
},
|
94
|
+
"segformer_b3_cityscapes_1024": {
|
95
|
+
"metadata": {
|
96
|
+
"description": (
|
97
|
+
"SegFormer model with MiTB3 backbone fine-tuned on Cityscapes in 1024x1024 resolution."
|
98
|
+
),
|
99
|
+
"params": 44603347,
|
100
|
+
"path": "segformer_b3",
|
101
|
+
},
|
102
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b3_cityscapes_1024/2",
|
103
|
+
},
|
104
|
+
"segformer_b4_cityscapes_1024": {
|
105
|
+
"metadata": {
|
106
|
+
"description": (
|
107
|
+
"SegFormer model with MiTB4 backbone fine-tuned on Cityscapes in 1024x1024 resolution."
|
108
|
+
),
|
109
|
+
"params": 61373907,
|
110
|
+
"path": "segformer_b4",
|
111
|
+
},
|
112
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b4_cityscapes_1024/2",
|
113
|
+
},
|
114
|
+
"segformer_b5_cityscapes_1024": {
|
115
|
+
"metadata": {
|
116
|
+
"description": (
|
117
|
+
"SegFormer model with MiTB5 backbone fine-tuned on Cityscapes in 1024x1024 resolution."
|
118
|
+
),
|
119
|
+
"params": 81974227,
|
120
|
+
"path": "segformer_b5",
|
121
|
+
},
|
122
|
+
"kaggle_handle": "kaggle://keras/segformer/keras/segformer_b5_cityscapes_1024/2",
|
123
|
+
},
|
124
|
+
}
|