nnInteractive 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nnInteractive/__init__.py +3 -0
- nnInteractive/inference/__init__.py +0 -0
- nnInteractive/inference/cvpr2025_challenge_baseline/__init__.py +0 -0
- nnInteractive/inference/cvpr2025_challenge_baseline/predict.py +173 -0
- nnInteractive/inference/inference_session.py +1400 -0
- nnInteractive/interaction/__init__.py +0 -0
- nnInteractive/interaction/point.py +166 -0
- nnInteractive/supervoxel/setup.py +4 -0
- nnInteractive/supervoxel/src/metadata.py +118 -0
- nnInteractive/supervoxel/src/reader.py +175 -0
- nnInteractive/supervoxel/src/run.py +136 -0
- nnInteractive/supervoxel/src/sam2/__init__.py +2 -0
- nnInteractive/supervoxel/src/sam2/sam2/__init__.py +11 -0
- nnInteractive/supervoxel/src/sam2/sam2/automatic_mask_generator.py +434 -0
- nnInteractive/supervoxel/src/sam2/sam2/benchmark.py +86 -0
- nnInteractive/supervoxel/src/sam2/sam2/build_sam.py +172 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/hieradet.py +305 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/image_encoder.py +132 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/utils.py +89 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_attention.py +167 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_encoder.py +179 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/position_encoding.py +217 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/mask_decoder.py +274 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/prompt_encoder.py +194 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/transformer.py +293 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_base.py +879 -0
- nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_utils.py +315 -0
- nnInteractive/supervoxel/src/sam2/sam2/sam2_image_predictor.py +433 -0
- nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor.py +1171 -0
- nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor_legacy.py +1125 -0
- nnInteractive/supervoxel/src/sam2/sam2/utils/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/sam2/utils/amg.py +332 -0
- nnInteractive/supervoxel/src/sam2/sam2/utils/misc.py +488 -0
- nnInteractive/supervoxel/src/sam2/sam2/utils/transforms.py +108 -0
- nnInteractive/supervoxel/src/sam2/setup.py +174 -0
- nnInteractive/supervoxel/src/sam2/training/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/sam2_datasets.py +176 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/transforms.py +481 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/utils.py +102 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/vos_dataset.py +154 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/vos_raw_dataset.py +290 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/vos_sampler.py +103 -0
- nnInteractive/supervoxel/src/sam2/training/dataset/vos_segment_loader.py +289 -0
- nnInteractive/supervoxel/src/sam2/training/loss_fns.py +290 -0
- nnInteractive/supervoxel/src/sam2/training/model/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/training/model/sam2.py +515 -0
- nnInteractive/supervoxel/src/sam2/training/optimizer.py +462 -0
- nnInteractive/supervoxel/src/sam2/training/scripts/sav_frame_extraction_submitit.py +157 -0
- nnInteractive/supervoxel/src/sam2/training/train.py +232 -0
- nnInteractive/supervoxel/src/sam2/training/trainer.py +1051 -0
- nnInteractive/supervoxel/src/sam2/training/utils/__init__.py +5 -0
- nnInteractive/supervoxel/src/sam2/training/utils/checkpoint_utils.py +328 -0
- nnInteractive/supervoxel/src/sam2/training/utils/data_utils.py +166 -0
- nnInteractive/supervoxel/src/sam2/training/utils/distributed.py +560 -0
- nnInteractive/supervoxel/src/sam2/training/utils/logger.py +236 -0
- nnInteractive/supervoxel/src/sam2/training/utils/train_utils.py +275 -0
- nnInteractive/supervoxel/src/supervoxel.py +198 -0
- nnInteractive/trainer/__init__.py +0 -0
- nnInteractive/trainer/nnInteractiveTrainer.py +24 -0
- nnInteractive/utils/__init__.py +0 -0
- nnInteractive/utils/bboxes.py +217 -0
- nnInteractive/utils/checkpoint_cleansing.py +9 -0
- nnInteractive/utils/crop.py +268 -0
- nnInteractive/utils/erosion_dilation.py +48 -0
- nnInteractive/utils/inference_helpers.py +45 -0
- nnInteractive/utils/os_shennanigans.py +16 -0
- nnInteractive/utils/rounding.py +13 -0
- nninteractive-2.0.0.dist-info/METADATA +511 -0
- nninteractive-2.0.0.dist-info/RECORD +76 -0
- nninteractive-2.0.0.dist-info/WHEEL +5 -0
- nninteractive-2.0.0.dist-info/licenses/LICENSE +201 -0
- nninteractive-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
|
|
4
|
+
# This source code is licensed under the license found in the
|
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import math
|
|
8
|
+
from copy import deepcopy
|
|
9
|
+
from itertools import product
|
|
10
|
+
from typing import Any, Dict, Generator, ItemsView, List, Tuple
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import torch
|
|
14
|
+
|
|
15
|
+
# Very lightly adapted from https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/utils/amg.py
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MaskData:
|
|
19
|
+
"""
|
|
20
|
+
A structure for storing masks and their related data in batched format.
|
|
21
|
+
Implements basic filtering and concatenation.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, **kwargs) -> None:
|
|
25
|
+
for v in kwargs.values():
|
|
26
|
+
assert isinstance(
|
|
27
|
+
v, (list, np.ndarray, torch.Tensor)
|
|
28
|
+
), "MaskData only supports list, numpy arrays, and torch tensors."
|
|
29
|
+
self._stats = dict(**kwargs)
|
|
30
|
+
|
|
31
|
+
def __setitem__(self, key: str, item: Any) -> None:
|
|
32
|
+
assert isinstance(
|
|
33
|
+
item, (list, np.ndarray, torch.Tensor)
|
|
34
|
+
), "MaskData only supports list, numpy arrays, and torch tensors."
|
|
35
|
+
self._stats[key] = item
|
|
36
|
+
|
|
37
|
+
def __delitem__(self, key: str) -> None:
|
|
38
|
+
del self._stats[key]
|
|
39
|
+
|
|
40
|
+
def __getitem__(self, key: str) -> Any:
|
|
41
|
+
return self._stats[key]
|
|
42
|
+
|
|
43
|
+
def items(self) -> ItemsView[str, Any]:
|
|
44
|
+
return self._stats.items()
|
|
45
|
+
|
|
46
|
+
def filter(self, keep: torch.Tensor) -> None:
|
|
47
|
+
for k, v in self._stats.items():
|
|
48
|
+
if v is None:
|
|
49
|
+
self._stats[k] = None
|
|
50
|
+
elif isinstance(v, torch.Tensor):
|
|
51
|
+
self._stats[k] = v[torch.as_tensor(keep, device=v.device)]
|
|
52
|
+
elif isinstance(v, np.ndarray):
|
|
53
|
+
self._stats[k] = v[keep.detach().cpu().numpy()]
|
|
54
|
+
elif isinstance(v, list) and keep.dtype == torch.bool:
|
|
55
|
+
self._stats[k] = [a for i, a in enumerate(v) if keep[i]]
|
|
56
|
+
elif isinstance(v, list):
|
|
57
|
+
self._stats[k] = [v[i] for i in keep]
|
|
58
|
+
else:
|
|
59
|
+
raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
|
|
60
|
+
|
|
61
|
+
def cat(self, new_stats: "MaskData") -> None:
|
|
62
|
+
for k, v in new_stats.items():
|
|
63
|
+
if k not in self._stats or self._stats[k] is None:
|
|
64
|
+
self._stats[k] = deepcopy(v)
|
|
65
|
+
elif isinstance(v, torch.Tensor):
|
|
66
|
+
self._stats[k] = torch.cat([self._stats[k], v], dim=0)
|
|
67
|
+
elif isinstance(v, np.ndarray):
|
|
68
|
+
self._stats[k] = np.concatenate([self._stats[k], v], axis=0)
|
|
69
|
+
elif isinstance(v, list):
|
|
70
|
+
self._stats[k] = self._stats[k] + deepcopy(v)
|
|
71
|
+
else:
|
|
72
|
+
raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
|
|
73
|
+
|
|
74
|
+
def to_numpy(self) -> None:
|
|
75
|
+
for k, v in self._stats.items():
|
|
76
|
+
if isinstance(v, torch.Tensor):
|
|
77
|
+
self._stats[k] = v.float().detach().cpu().numpy()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_box_near_crop_edge(
|
|
81
|
+
boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
|
|
82
|
+
) -> torch.Tensor:
|
|
83
|
+
"""Filter masks at the edge of a crop, but not at the edge of the original image."""
|
|
84
|
+
crop_box_torch = torch.as_tensor(crop_box, dtype=torch.float, device=boxes.device)
|
|
85
|
+
orig_box_torch = torch.as_tensor(orig_box, dtype=torch.float, device=boxes.device)
|
|
86
|
+
boxes = uncrop_boxes_xyxy(boxes, crop_box).float()
|
|
87
|
+
near_crop_edge = torch.isclose(boxes, crop_box_torch[None, :], atol=atol, rtol=0)
|
|
88
|
+
near_image_edge = torch.isclose(boxes, orig_box_torch[None, :], atol=atol, rtol=0)
|
|
89
|
+
near_crop_edge = torch.logical_and(near_crop_edge, ~near_image_edge)
|
|
90
|
+
return torch.any(near_crop_edge, dim=1)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
|
|
94
|
+
box_xywh = deepcopy(box_xyxy)
|
|
95
|
+
box_xywh[2] = box_xywh[2] - box_xywh[0]
|
|
96
|
+
box_xywh[3] = box_xywh[3] - box_xywh[1]
|
|
97
|
+
return box_xywh
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
|
|
101
|
+
assert len(args) > 0 and all(
|
|
102
|
+
len(a) == len(args[0]) for a in args
|
|
103
|
+
), "Batched iteration must have inputs of all the same size."
|
|
104
|
+
n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0)
|
|
105
|
+
for b in range(n_batches):
|
|
106
|
+
yield [arg[b * batch_size : (b + 1) * batch_size] for arg in args]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
|
|
110
|
+
"""
|
|
111
|
+
Encodes masks to an uncompressed RLE, in the format expected by
|
|
112
|
+
pycoco tools.
|
|
113
|
+
"""
|
|
114
|
+
# Put in fortran order and flatten h,w
|
|
115
|
+
b, h, w = tensor.shape
|
|
116
|
+
tensor = tensor.permute(0, 2, 1).flatten(1)
|
|
117
|
+
|
|
118
|
+
# Compute change indices
|
|
119
|
+
diff = tensor[:, 1:] ^ tensor[:, :-1]
|
|
120
|
+
change_indices = diff.nonzero()
|
|
121
|
+
|
|
122
|
+
# Encode run length
|
|
123
|
+
out = []
|
|
124
|
+
for i in range(b):
|
|
125
|
+
cur_idxs = change_indices[change_indices[:, 0] == i, 1]
|
|
126
|
+
cur_idxs = torch.cat(
|
|
127
|
+
[
|
|
128
|
+
torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device),
|
|
129
|
+
cur_idxs + 1,
|
|
130
|
+
torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device),
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
|
|
134
|
+
counts = [] if tensor[i, 0] == 0 else [0]
|
|
135
|
+
counts.extend(btw_idxs.detach().cpu().tolist())
|
|
136
|
+
out.append({"size": [h, w], "counts": counts})
|
|
137
|
+
return out
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
|
|
141
|
+
"""Compute a binary mask from an uncompressed RLE."""
|
|
142
|
+
h, w = rle["size"]
|
|
143
|
+
mask = np.empty(h * w, dtype=bool)
|
|
144
|
+
idx = 0
|
|
145
|
+
parity = False
|
|
146
|
+
for count in rle["counts"]:
|
|
147
|
+
mask[idx : idx + count] = parity
|
|
148
|
+
idx += count
|
|
149
|
+
parity ^= True
|
|
150
|
+
mask = mask.reshape(w, h)
|
|
151
|
+
return mask.transpose() # Put in C order
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def area_from_rle(rle: Dict[str, Any]) -> int:
|
|
155
|
+
return sum(rle["counts"][1::2])
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
|
|
159
|
+
"""
|
|
160
|
+
Computes the stability score for a batch of masks. The stability
|
|
161
|
+
score is the IoU between the binary masks obtained by thresholding
|
|
162
|
+
the predicted mask logits at high and low values.
|
|
163
|
+
"""
|
|
164
|
+
# One mask is always contained inside the other.
|
|
165
|
+
# Save memory by preventing unnecessary cast to torch.int64
|
|
166
|
+
intersections = (masks > (mask_threshold + threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)
|
|
167
|
+
unions = (masks > (mask_threshold - threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)
|
|
168
|
+
return intersections / unions
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def build_point_grid(n_per_side: int) -> np.ndarray:
|
|
172
|
+
"""Generates a 2D grid of points evenly spaced in [0,1]x[0,1]."""
|
|
173
|
+
offset = 1 / (2 * n_per_side)
|
|
174
|
+
points_one_side = np.linspace(offset, 1 - offset, n_per_side)
|
|
175
|
+
points_x = np.tile(points_one_side[None, :], (n_per_side, 1))
|
|
176
|
+
points_y = np.tile(points_one_side[:, None], (1, n_per_side))
|
|
177
|
+
points = np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
|
|
178
|
+
return points
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> List[np.ndarray]:
|
|
182
|
+
"""Generates point grids for all crop layers."""
|
|
183
|
+
points_by_layer = []
|
|
184
|
+
for i in range(n_layers + 1):
|
|
185
|
+
n_points = int(n_per_side / (scale_per_layer**i))
|
|
186
|
+
points_by_layer.append(build_point_grid(n_points))
|
|
187
|
+
return points_by_layer
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def generate_crop_boxes(
|
|
191
|
+
im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
|
|
192
|
+
) -> Tuple[List[List[int]], List[int]]:
|
|
193
|
+
"""
|
|
194
|
+
Generates a list of crop boxes of different sizes. Each layer
|
|
195
|
+
has (2**i)**2 boxes for the ith layer.
|
|
196
|
+
"""
|
|
197
|
+
crop_boxes, layer_idxs = [], []
|
|
198
|
+
im_h, im_w = im_size
|
|
199
|
+
short_side = min(im_h, im_w)
|
|
200
|
+
|
|
201
|
+
# Original image
|
|
202
|
+
crop_boxes.append([0, 0, im_w, im_h])
|
|
203
|
+
layer_idxs.append(0)
|
|
204
|
+
|
|
205
|
+
def crop_len(orig_len, n_crops, overlap):
|
|
206
|
+
return int(math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops))
|
|
207
|
+
|
|
208
|
+
for i_layer in range(n_layers):
|
|
209
|
+
n_crops_per_side = 2 ** (i_layer + 1)
|
|
210
|
+
overlap = int(overlap_ratio * short_side * (2 / n_crops_per_side))
|
|
211
|
+
|
|
212
|
+
crop_w = crop_len(im_w, n_crops_per_side, overlap)
|
|
213
|
+
crop_h = crop_len(im_h, n_crops_per_side, overlap)
|
|
214
|
+
|
|
215
|
+
crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)]
|
|
216
|
+
crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)]
|
|
217
|
+
|
|
218
|
+
# Crops in XYWH format
|
|
219
|
+
for x0, y0 in product(crop_box_x0, crop_box_y0):
|
|
220
|
+
box = [x0, y0, min(x0 + crop_w, im_w), min(y0 + crop_h, im_h)]
|
|
221
|
+
crop_boxes.append(box)
|
|
222
|
+
layer_idxs.append(i_layer + 1)
|
|
223
|
+
|
|
224
|
+
return crop_boxes, layer_idxs
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
|
|
228
|
+
x0, y0, _, _ = crop_box
|
|
229
|
+
offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
|
|
230
|
+
# Check if boxes has a channel dimension
|
|
231
|
+
if len(boxes.shape) == 3:
|
|
232
|
+
offset = offset.unsqueeze(1)
|
|
233
|
+
return boxes + offset
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
|
|
237
|
+
x0, y0, _, _ = crop_box
|
|
238
|
+
offset = torch.tensor([[x0, y0]], device=points.device)
|
|
239
|
+
# Check if points has a channel dimension
|
|
240
|
+
if len(points.shape) == 3:
|
|
241
|
+
offset = offset.unsqueeze(1)
|
|
242
|
+
return points + offset
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int) -> torch.Tensor:
|
|
246
|
+
x0, y0, x1, y1 = crop_box
|
|
247
|
+
if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
|
|
248
|
+
return masks
|
|
249
|
+
# Coordinate transform masks
|
|
250
|
+
pad_x, pad_y = orig_w - (x1 - x0), orig_h - (y1 - y0)
|
|
251
|
+
pad = (x0, pad_x - x0, y0, pad_y - y0)
|
|
252
|
+
return torch.nn.functional.pad(masks, pad, value=0)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tuple[np.ndarray, bool]:
|
|
256
|
+
"""
|
|
257
|
+
Removes small disconnected regions and holes in a mask. Returns the
|
|
258
|
+
mask and an indicator of if the mask has been modified.
|
|
259
|
+
"""
|
|
260
|
+
import cv2 # type: ignore
|
|
261
|
+
|
|
262
|
+
assert mode in ["holes", "islands"]
|
|
263
|
+
correct_holes = mode == "holes"
|
|
264
|
+
working_mask = (correct_holes ^ mask).astype(np.uint8)
|
|
265
|
+
n_labels, regions, stats, _ = cv2.connectedComponentsWithStats(working_mask, 8)
|
|
266
|
+
sizes = stats[:, -1][1:] # Row 0 is background label
|
|
267
|
+
small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh]
|
|
268
|
+
if len(small_regions) == 0:
|
|
269
|
+
return mask, False
|
|
270
|
+
fill_labels = [0] + small_regions
|
|
271
|
+
if not correct_holes:
|
|
272
|
+
fill_labels = [i for i in range(n_labels) if i not in fill_labels]
|
|
273
|
+
# If every region is below threshold, keep largest
|
|
274
|
+
if len(fill_labels) == 0:
|
|
275
|
+
fill_labels = [int(np.argmax(sizes)) + 1]
|
|
276
|
+
mask = np.isin(regions, fill_labels)
|
|
277
|
+
return mask, True
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
|
|
281
|
+
from pycocotools import mask as mask_utils # type: ignore
|
|
282
|
+
|
|
283
|
+
h, w = uncompressed_rle["size"]
|
|
284
|
+
rle = mask_utils.frPyObjects(uncompressed_rle, h, w)
|
|
285
|
+
rle["counts"] = rle["counts"].decode("utf-8") # Necessary to serialize with json
|
|
286
|
+
return rle
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
|
|
290
|
+
"""
|
|
291
|
+
Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
|
|
292
|
+
an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
|
|
293
|
+
"""
|
|
294
|
+
# torch.max below raises an error on empty inputs, just skip in this case
|
|
295
|
+
if torch.numel(masks) == 0:
|
|
296
|
+
return torch.zeros(*masks.shape[:-2], 4, device=masks.device)
|
|
297
|
+
|
|
298
|
+
# Normalize shape to CxHxW
|
|
299
|
+
shape = masks.shape
|
|
300
|
+
h, w = shape[-2:]
|
|
301
|
+
if len(shape) > 2:
|
|
302
|
+
masks = masks.flatten(0, -3)
|
|
303
|
+
else:
|
|
304
|
+
masks = masks.unsqueeze(0)
|
|
305
|
+
|
|
306
|
+
# Get top and bottom edges
|
|
307
|
+
in_height, _ = torch.max(masks, dim=-1)
|
|
308
|
+
in_height_coords = in_height * torch.arange(h, device=in_height.device)[None, :]
|
|
309
|
+
bottom_edges, _ = torch.max(in_height_coords, dim=-1)
|
|
310
|
+
in_height_coords = in_height_coords + h * (~in_height)
|
|
311
|
+
top_edges, _ = torch.min(in_height_coords, dim=-1)
|
|
312
|
+
|
|
313
|
+
# Get left and right edges
|
|
314
|
+
in_width, _ = torch.max(masks, dim=-2)
|
|
315
|
+
in_width_coords = in_width * torch.arange(w, device=in_width.device)[None, :]
|
|
316
|
+
right_edges, _ = torch.max(in_width_coords, dim=-1)
|
|
317
|
+
in_width_coords = in_width_coords + w * (~in_width)
|
|
318
|
+
left_edges, _ = torch.min(in_width_coords, dim=-1)
|
|
319
|
+
|
|
320
|
+
# If the mask is empty the right edge will be to the left of the left edge.
|
|
321
|
+
# Replace these boxes with [0, 0, 0, 0]
|
|
322
|
+
empty_filter = (right_edges < left_edges) | (bottom_edges < top_edges)
|
|
323
|
+
out = torch.stack([left_edges, top_edges, right_edges, bottom_edges], dim=-1)
|
|
324
|
+
out = out * (~empty_filter).unsqueeze(-1)
|
|
325
|
+
|
|
326
|
+
# Return to original shape
|
|
327
|
+
if len(shape) > 2:
|
|
328
|
+
out = out.reshape(*shape[:-2], 4)
|
|
329
|
+
else:
|
|
330
|
+
out = out[0]
|
|
331
|
+
|
|
332
|
+
return out
|