sleap-nn 0.1.0__py3-none-any.whl → 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. sleap_nn/__init__.py +2 -4
  2. sleap_nn/architectures/convnext.py +0 -5
  3. sleap_nn/architectures/encoder_decoder.py +6 -25
  4. sleap_nn/architectures/swint.py +0 -8
  5. sleap_nn/cli.py +60 -364
  6. sleap_nn/config/data_config.py +5 -11
  7. sleap_nn/config/get_config.py +4 -10
  8. sleap_nn/config/trainer_config.py +0 -76
  9. sleap_nn/data/augmentation.py +241 -50
  10. sleap_nn/data/custom_datasets.py +39 -411
  11. sleap_nn/data/instance_cropping.py +1 -1
  12. sleap_nn/data/resizing.py +2 -2
  13. sleap_nn/data/utils.py +17 -135
  14. sleap_nn/evaluation.py +22 -81
  15. sleap_nn/inference/bottomup.py +20 -86
  16. sleap_nn/inference/peak_finding.py +19 -88
  17. sleap_nn/inference/predictors.py +117 -224
  18. sleap_nn/legacy_models.py +11 -65
  19. sleap_nn/predict.py +9 -37
  20. sleap_nn/train.py +4 -74
  21. sleap_nn/training/callbacks.py +105 -1046
  22. sleap_nn/training/lightning_modules.py +65 -602
  23. sleap_nn/training/model_trainer.py +184 -211
  24. {sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a0.dist-info}/METADATA +3 -15
  25. sleap_nn-0.1.0a0.dist-info/RECORD +65 -0
  26. {sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a0.dist-info}/WHEEL +1 -1
  27. sleap_nn/data/skia_augmentation.py +0 -414
  28. sleap_nn/export/__init__.py +0 -21
  29. sleap_nn/export/cli.py +0 -1778
  30. sleap_nn/export/exporters/__init__.py +0 -51
  31. sleap_nn/export/exporters/onnx_exporter.py +0 -80
  32. sleap_nn/export/exporters/tensorrt_exporter.py +0 -291
  33. sleap_nn/export/metadata.py +0 -225
  34. sleap_nn/export/predictors/__init__.py +0 -63
  35. sleap_nn/export/predictors/base.py +0 -22
  36. sleap_nn/export/predictors/onnx.py +0 -154
  37. sleap_nn/export/predictors/tensorrt.py +0 -312
  38. sleap_nn/export/utils.py +0 -307
  39. sleap_nn/export/wrappers/__init__.py +0 -25
  40. sleap_nn/export/wrappers/base.py +0 -96
  41. sleap_nn/export/wrappers/bottomup.py +0 -243
  42. sleap_nn/export/wrappers/bottomup_multiclass.py +0 -195
  43. sleap_nn/export/wrappers/centered_instance.py +0 -56
  44. sleap_nn/export/wrappers/centroid.py +0 -58
  45. sleap_nn/export/wrappers/single_instance.py +0 -83
  46. sleap_nn/export/wrappers/topdown.py +0 -180
  47. sleap_nn/export/wrappers/topdown_multiclass.py +0 -304
  48. sleap_nn/inference/postprocessing.py +0 -284
  49. sleap_nn/training/schedulers.py +0 -191
  50. sleap_nn-0.1.0.dist-info/RECORD +0 -88
  51. {sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a0.dist-info}/entry_points.txt +0 -0
  52. {sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a0.dist-info}/licenses/LICENSE +0 -0
  53. {sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a0.dist-info}/top_level.txt +0 -0
@@ -1,304 +0,0 @@
1
- """ONNX wrapper for top-down multiclass (supervised ID) models."""
2
-
3
- from typing import Dict
4
-
5
- import torch
6
- import torch.nn as nn
7
- import torch.nn.functional as F
8
-
9
- from sleap_nn.export.wrappers.base import BaseExportWrapper
10
-
11
-
12
- class TopDownMultiClassONNXWrapper(BaseExportWrapper):
13
- """ONNX-exportable wrapper for top-down multiclass (supervised ID) models.
14
-
15
- This wrapper handles models that output both confidence maps for keypoint
16
- detection and class logits for identity classification. It runs on instance
17
- crops (centered around detected centroids).
18
-
19
- Expects input images as uint8 tensors in [0, 255].
20
-
21
- Attributes:
22
- model: The underlying PyTorch model (centered instance + class vectors heads).
23
- output_stride: Output stride of the confmap head.
24
- input_scale: Scale factor applied to input images before inference.
25
- n_classes: Number of identity classes.
26
- """
27
-
28
- def __init__(
29
- self,
30
- model: nn.Module,
31
- output_stride: int = 2,
32
- input_scale: float = 1.0,
33
- n_classes: int = 2,
34
- ):
35
- """Initialize the wrapper.
36
-
37
- Args:
38
- model: The underlying PyTorch model.
39
- output_stride: Output stride of the confidence maps.
40
- input_scale: Scale factor for input images.
41
- n_classes: Number of identity classes (e.g., 2 for male/female).
42
- """
43
- super().__init__(model)
44
- self.output_stride = output_stride
45
- self.input_scale = input_scale
46
- self.n_classes = n_classes
47
-
48
- def forward(self, image: torch.Tensor) -> Dict[str, torch.Tensor]:
49
- """Run top-down multiclass inference on crops.
50
-
51
- Args:
52
- image: Input image tensor of shape (batch, channels, height, width).
53
- Expected to be uint8 in [0, 255].
54
-
55
- Returns:
56
- Dictionary with keys:
57
- - "peaks": Predicted peak coordinates (batch, n_nodes, 2) in (x, y).
58
- - "peak_vals": Peak confidence values (batch, n_nodes).
59
- - "class_logits": Raw class logits (batch, n_classes).
60
-
61
- The class assignment is done on CPU using Hungarian matching
62
- via `get_class_inds_from_vectors()`.
63
- """
64
- # Normalize uint8 [0, 255] to float32 [0, 1]
65
- image = self._normalize_uint8(image)
66
-
67
- # Apply input scaling if needed
68
- if self.input_scale != 1.0:
69
- height = int(image.shape[-2] * self.input_scale)
70
- width = int(image.shape[-1] * self.input_scale)
71
- image = F.interpolate(
72
- image, size=(height, width), mode="bilinear", align_corners=False
73
- )
74
-
75
- # Forward pass
76
- out = self.model(image)
77
-
78
- # Extract outputs
79
- confmaps = self._extract_tensor(out, ["centered", "instance", "confmap"])
80
- class_logits = self._extract_tensor(out, ["class", "vector"])
81
-
82
- # Find global peaks (one per node)
83
- peaks, peak_vals = self._find_global_peaks(confmaps)
84
-
85
- # Scale peaks back to input coordinates
86
- peaks = peaks * (self.output_stride / self.input_scale)
87
-
88
- return {
89
- "peaks": peaks,
90
- "peak_vals": peak_vals,
91
- "class_logits": class_logits,
92
- }
93
-
94
-
95
- class TopDownMultiClassCombinedONNXWrapper(BaseExportWrapper):
96
- """ONNX-exportable wrapper for combined centroid + multiclass instance models.
97
-
98
- This wrapper combines a centroid detection model with a centered instance
99
- multiclass model. It performs:
100
- 1. Centroid detection on full images
101
- 2. Cropping around each centroid using vectorized grid_sample
102
- 3. Instance keypoint detection + identity classification on each crop
103
-
104
- Expects input images as uint8 tensors in [0, 255].
105
- """
106
-
107
- def __init__(
108
- self,
109
- centroid_model: nn.Module,
110
- instance_model: nn.Module,
111
- max_instances: int = 20,
112
- crop_size: tuple = (192, 192),
113
- centroid_output_stride: int = 4,
114
- instance_output_stride: int = 2,
115
- centroid_input_scale: float = 1.0,
116
- instance_input_scale: float = 1.0,
117
- n_nodes: int = 13,
118
- n_classes: int = 2,
119
- ):
120
- """Initialize the combined wrapper.
121
-
122
- Args:
123
- centroid_model: Model for centroid detection.
124
- instance_model: Model for instance keypoints + class prediction.
125
- max_instances: Maximum number of instances to detect.
126
- crop_size: Size of crops around centroids (height, width).
127
- centroid_output_stride: Output stride of centroid model.
128
- instance_output_stride: Output stride of instance model.
129
- centroid_input_scale: Input scale for centroid model.
130
- instance_input_scale: Input scale for instance model.
131
- n_nodes: Number of keypoint nodes per instance.
132
- n_classes: Number of identity classes.
133
- """
134
- super().__init__(centroid_model) # Primary model is centroid
135
- self.instance_model = instance_model
136
- self.max_instances = max_instances
137
- self.crop_size = crop_size
138
- self.centroid_output_stride = centroid_output_stride
139
- self.instance_output_stride = instance_output_stride
140
- self.centroid_input_scale = centroid_input_scale
141
- self.instance_input_scale = instance_input_scale
142
- self.n_nodes = n_nodes
143
- self.n_classes = n_classes
144
-
145
- # Pre-compute base grid for crop extraction (same as TopDownONNXWrapper)
146
- crop_h, crop_w = crop_size
147
- y_crop = torch.linspace(-1, 1, crop_h, dtype=torch.float32)
148
- x_crop = torch.linspace(-1, 1, crop_w, dtype=torch.float32)
149
- grid_y, grid_x = torch.meshgrid(y_crop, x_crop, indexing="ij")
150
- base_grid = torch.stack([grid_x, grid_y], dim=-1)
151
- self.register_buffer("base_grid", base_grid, persistent=False)
152
-
153
- def forward(self, image: torch.Tensor) -> Dict[str, torch.Tensor]:
154
- """Run combined top-down multiclass inference.
155
-
156
- Args:
157
- image: Input image tensor of shape (batch, channels, height, width).
158
- Expected to be uint8 in [0, 255].
159
-
160
- Returns:
161
- Dictionary with keys:
162
- - "centroids": Detected centroids (batch, max_instances, 2).
163
- - "centroid_vals": Centroid confidence values (batch, max_instances).
164
- - "peaks": Instance peaks (batch, max_instances, n_nodes, 2).
165
- - "peak_vals": Peak values (batch, max_instances, n_nodes).
166
- - "class_logits": Class logits per instance (batch, max_instances, n_classes).
167
- - "instance_valid": Validity mask (batch, max_instances).
168
- """
169
- # Normalize input
170
- image = self._normalize_uint8(image)
171
- batch_size, channels, height, width = image.shape
172
-
173
- # Apply centroid input scaling
174
- scaled_image = image
175
- if self.centroid_input_scale != 1.0:
176
- scaled_h = int(height * self.centroid_input_scale)
177
- scaled_w = int(width * self.centroid_input_scale)
178
- scaled_image = F.interpolate(
179
- scaled_image,
180
- size=(scaled_h, scaled_w),
181
- mode="bilinear",
182
- align_corners=False,
183
- )
184
-
185
- # Centroid detection
186
- centroid_out = self.model(scaled_image)
187
- centroid_cms = self._extract_tensor(centroid_out, ["centroid", "confmap"])
188
- centroids, centroid_vals, instance_valid = self._find_topk_peaks(
189
- centroid_cms, self.max_instances
190
- )
191
- centroids = centroids * (
192
- self.centroid_output_stride / self.centroid_input_scale
193
- )
194
-
195
- # Extract crops using vectorized grid_sample (same as TopDownONNXWrapper)
196
- crops = self._extract_crops(image, centroids)
197
- crops_flat = crops.reshape(
198
- batch_size * self.max_instances,
199
- channels,
200
- self.crop_size[0],
201
- self.crop_size[1],
202
- )
203
-
204
- # Apply instance input scaling if needed
205
- if self.instance_input_scale != 1.0:
206
- scaled_h = int(self.crop_size[0] * self.instance_input_scale)
207
- scaled_w = int(self.crop_size[1] * self.instance_input_scale)
208
- crops_flat = F.interpolate(
209
- crops_flat,
210
- size=(scaled_h, scaled_w),
211
- mode="bilinear",
212
- align_corners=False,
213
- )
214
-
215
- # Instance model forward (batch all crops)
216
- instance_out = self.instance_model(crops_flat)
217
- instance_cms = self._extract_tensor(
218
- instance_out, ["centered", "instance", "confmap"]
219
- )
220
- instance_class = self._extract_tensor(instance_out, ["class", "vector"])
221
-
222
- # Find peaks in all crops
223
- crop_peaks, crop_peak_vals = self._find_global_peaks(instance_cms)
224
- crop_peaks = crop_peaks * (
225
- self.instance_output_stride / self.instance_input_scale
226
- )
227
-
228
- # Reshape to batch x instances x nodes x 2
229
- crop_peaks = crop_peaks.reshape(batch_size, self.max_instances, self.n_nodes, 2)
230
- peak_vals = crop_peak_vals.reshape(batch_size, self.max_instances, self.n_nodes)
231
-
232
- # Reshape class logits
233
- class_logits = instance_class.reshape(
234
- batch_size, self.max_instances, self.n_classes
235
- )
236
-
237
- # Transform peaks from crop coordinates to full image coordinates
238
- crop_offset = centroids.unsqueeze(2) - image.new_tensor(
239
- [self.crop_size[1] / 2.0, self.crop_size[0] / 2.0]
240
- )
241
- peaks = crop_peaks + crop_offset
242
-
243
- # Zero out invalid instances
244
- invalid_mask = ~instance_valid
245
- centroids = centroids.masked_fill(invalid_mask.unsqueeze(-1), 0.0)
246
- centroid_vals = centroid_vals.masked_fill(invalid_mask, 0.0)
247
- peaks = peaks.masked_fill(invalid_mask.unsqueeze(-1).unsqueeze(-1), 0.0)
248
- peak_vals = peak_vals.masked_fill(invalid_mask.unsqueeze(-1), 0.0)
249
- class_logits = class_logits.masked_fill(invalid_mask.unsqueeze(-1), 0.0)
250
-
251
- return {
252
- "centroids": centroids,
253
- "centroid_vals": centroid_vals,
254
- "peaks": peaks,
255
- "peak_vals": peak_vals,
256
- "class_logits": class_logits,
257
- "instance_valid": instance_valid,
258
- }
259
-
260
- def _extract_crops(
261
- self,
262
- image: torch.Tensor,
263
- centroids: torch.Tensor,
264
- ) -> torch.Tensor:
265
- """Extract crops around centroids using grid_sample.
266
-
267
- This is the same vectorized implementation as TopDownONNXWrapper.
268
- """
269
- batch_size, channels, height, width = image.shape
270
- crop_h, crop_w = self.crop_size
271
- n_instances = centroids.shape[1]
272
-
273
- scale_x = crop_w / width
274
- scale_y = crop_h / height
275
- scale = image.new_tensor([scale_x, scale_y])
276
- base_grid = self.base_grid.to(device=image.device, dtype=image.dtype)
277
- scaled_grid = base_grid * scale
278
-
279
- scaled_grid = scaled_grid.unsqueeze(0).unsqueeze(0)
280
- scaled_grid = scaled_grid.expand(batch_size, n_instances, -1, -1, -1)
281
-
282
- norm_centroids = torch.zeros_like(centroids)
283
- norm_centroids[..., 0] = (centroids[..., 0] / (width - 1)) * 2 - 1
284
- norm_centroids[..., 1] = (centroids[..., 1] / (height - 1)) * 2 - 1
285
- offset = norm_centroids.unsqueeze(2).unsqueeze(2)
286
-
287
- sample_grid = scaled_grid + offset
288
-
289
- image_expanded = image.unsqueeze(1).expand(-1, n_instances, -1, -1, -1)
290
- image_flat = image_expanded.reshape(
291
- batch_size * n_instances, channels, height, width
292
- )
293
- grid_flat = sample_grid.reshape(batch_size * n_instances, crop_h, crop_w, 2)
294
-
295
- crops_flat = F.grid_sample(
296
- image_flat,
297
- grid_flat,
298
- mode="bilinear",
299
- padding_mode="zeros",
300
- align_corners=True,
301
- )
302
-
303
- crops = crops_flat.reshape(batch_size, n_instances, channels, crop_h, crop_w)
304
- return crops
@@ -1,284 +0,0 @@
1
- """Inference-level postprocessing filters for pose predictions.
2
-
3
- This module provides filters that run after model inference but before tracking.
4
- These filters are independent of tracking configuration and can be used standalone.
5
- """
6
-
7
- from typing import List, Literal
8
-
9
- import numpy as np
10
- import sleap_io as sio
11
-
12
-
13
- def filter_overlapping_instances(
14
- labels: sio.Labels,
15
- threshold: float = 0.8,
16
- method: Literal["iou", "oks"] = "iou",
17
- ) -> sio.Labels:
18
- """Filter overlapping instances using greedy non-maximum suppression.
19
-
20
- Removes duplicate/overlapping instances by applying greedy NMS based on
21
- either bounding box IOU or Object Keypoint Similarity (OKS). When two
22
- instances overlap above the threshold, the lower-scoring one is removed.
23
-
24
- This filter runs independently of tracking and can be used to clean up
25
- model outputs before saving or further processing.
26
-
27
- Args:
28
- labels: Labels object with predicted instances to filter.
29
- threshold: Similarity threshold for considering instances as overlapping.
30
- Instances with similarity > threshold are candidates for removal.
31
- Lower values are more aggressive (remove more).
32
- Typical values: 0.3 (aggressive) to 0.8 (permissive).
33
- method: Similarity metric to use for comparing instances.
34
- "iou": Bounding box intersection-over-union.
35
- "oks": Object Keypoint Similarity (pose-based).
36
-
37
- Returns:
38
- The input Labels object with overlapping instances removed.
39
- Modification is done in place, but the object is also returned
40
- for convenience.
41
-
42
- Example:
43
- >>> # Filter instances with >80% bounding box overlap
44
- >>> labels = filter_overlapping_instances(labels, threshold=0.8, method="iou")
45
- >>> # Filter using OKS similarity
46
- >>> labels = filter_overlapping_instances(labels, threshold=0.5, method="oks")
47
-
48
- Note:
49
- - Only affects frames with 2+ predicted instances
50
- - Uses instance.score for ranking; higher scores are preferred
51
- - For IOU: bounding boxes computed from non-NaN keypoints
52
- - For OKS: uses standard COCO OKS formula with bbox-derived scale
53
- """
54
- for lf in labels.labeled_frames:
55
- if len(lf.instances) <= 1:
56
- continue
57
-
58
- # Separate predicted instances (have scores) from other instances
59
- predicted = []
60
- other = []
61
- for inst in lf.instances:
62
- if isinstance(inst, sio.PredictedInstance):
63
- predicted.append(inst)
64
- else:
65
- other.append(inst)
66
-
67
- # Only filter predicted instances
68
- if len(predicted) <= 1:
69
- continue
70
-
71
- # Get scores
72
- scores = np.array([_instance_score(inst) for inst in predicted])
73
-
74
- # Apply greedy NMS with selected method
75
- if method == "iou":
76
- bboxes = np.array([_instance_bbox(inst) for inst in predicted])
77
- keep_indices = _nms_greedy_iou(bboxes, scores, threshold)
78
- elif method == "oks":
79
- points = [inst.numpy() for inst in predicted]
80
- keep_indices = _nms_greedy_oks(points, scores, threshold)
81
- else:
82
- raise ValueError(f"Unknown method: {method}. Use 'iou' or 'oks'.")
83
-
84
- # Reconstruct instance list: kept predicted + other instances
85
- kept_predicted = [predicted[i] for i in keep_indices]
86
- lf.instances = kept_predicted + other
87
-
88
- return labels
89
-
90
-
91
- def _instance_bbox(instance: sio.PredictedInstance) -> np.ndarray:
92
- """Compute axis-aligned bounding box from instance keypoints.
93
-
94
- Args:
95
- instance: Instance with keypoints.
96
-
97
- Returns:
98
- Bounding box as [xmin, ymin, xmax, ymax].
99
- Returns [0, 0, 0, 0] if no valid keypoints.
100
- """
101
- pts = instance.numpy() # (n_nodes, 2)
102
- valid = ~np.isnan(pts).any(axis=1)
103
-
104
- if not valid.any():
105
- return np.array([0.0, 0.0, 0.0, 0.0])
106
-
107
- pts = pts[valid]
108
- return np.array(
109
- [pts[:, 0].min(), pts[:, 1].min(), pts[:, 0].max(), pts[:, 1].max()]
110
- )
111
-
112
-
113
- def _instance_score(instance: sio.PredictedInstance) -> float:
114
- """Get instance confidence score.
115
-
116
- Args:
117
- instance: Predicted instance.
118
-
119
- Returns:
120
- Instance score, or 1.0 if not available.
121
- """
122
- return getattr(instance, "score", 1.0)
123
-
124
-
125
- def _nms_greedy_iou(
126
- bboxes: np.ndarray,
127
- scores: np.ndarray,
128
- threshold: float,
129
- ) -> List[int]:
130
- """Apply greedy NMS using bounding box IOU.
131
-
132
- Args:
133
- bboxes: Bounding boxes of shape (N, 4) as [xmin, ymin, xmax, ymax].
134
- scores: Confidence scores of shape (N,).
135
- threshold: IOU threshold for suppression.
136
-
137
- Returns:
138
- List of indices to keep, in order of decreasing score.
139
- """
140
- if len(bboxes) == 0:
141
- return []
142
-
143
- # Sort by score descending
144
- order = scores.argsort()[::-1].tolist()
145
-
146
- keep = []
147
- while order:
148
- # Take highest scoring remaining instance
149
- i = order.pop(0)
150
- keep.append(i)
151
-
152
- if not order:
153
- break
154
-
155
- # Compute IOU with all remaining instances
156
- remaining_indices = np.array(order)
157
- similarities = _compute_iou_one_to_many(bboxes[i], bboxes[remaining_indices])
158
-
159
- # Keep only instances with similarity <= threshold
160
- mask = similarities <= threshold
161
- order = [order[j] for j in range(len(order)) if mask[j]]
162
-
163
- return keep
164
-
165
-
166
- def _nms_greedy_oks(
167
- points_list: List[np.ndarray],
168
- scores: np.ndarray,
169
- threshold: float,
170
- ) -> List[int]:
171
- """Apply greedy NMS using Object Keypoint Similarity (OKS).
172
-
173
- Args:
174
- points_list: List of keypoint arrays, each of shape (n_nodes, 2).
175
- scores: Confidence scores of shape (N,).
176
- threshold: OKS threshold for suppression.
177
-
178
- Returns:
179
- List of indices to keep, in order of decreasing score.
180
- """
181
- if len(points_list) == 0:
182
- return []
183
-
184
- # Sort by score descending
185
- order = scores.argsort()[::-1].tolist()
186
-
187
- keep = []
188
- while order:
189
- # Take highest scoring remaining instance
190
- i = order.pop(0)
191
- keep.append(i)
192
-
193
- if not order:
194
- break
195
-
196
- # Compute OKS with all remaining instances
197
- similarities = np.array(
198
- [_compute_oks(points_list[i], points_list[j]) for j in order]
199
- )
200
-
201
- # Keep only instances with similarity <= threshold
202
- mask = similarities <= threshold
203
- order = [order[j] for j in range(len(order)) if mask[j]]
204
-
205
- return keep
206
-
207
-
208
- def _compute_iou_one_to_many(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
209
- """Compute IOU between one box and multiple boxes.
210
-
211
- Args:
212
- box: Single box of shape (4,) as [xmin, ymin, xmax, ymax].
213
- boxes: Multiple boxes of shape (N, 4).
214
-
215
- Returns:
216
- IOU values of shape (N,).
217
- """
218
- # Intersection coordinates
219
- inter_xmin = np.maximum(box[0], boxes[:, 0])
220
- inter_ymin = np.maximum(box[1], boxes[:, 1])
221
- inter_xmax = np.minimum(box[2], boxes[:, 2])
222
- inter_ymax = np.minimum(box[3], boxes[:, 3])
223
-
224
- # Intersection area (0 if no overlap)
225
- inter_w = np.maximum(0.0, inter_xmax - inter_xmin)
226
- inter_h = np.maximum(0.0, inter_ymax - inter_ymin)
227
- inter_area = inter_w * inter_h
228
-
229
- # Individual areas
230
- area_a = (box[2] - box[0]) * (box[3] - box[1])
231
- area_b = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
232
-
233
- # Union area
234
- union_area = area_a + area_b - inter_area
235
-
236
- # IOU (avoid division by zero)
237
- return np.where(union_area > 0, inter_area / union_area, 0.0)
238
-
239
-
240
- def _compute_oks(
241
- points_a: np.ndarray,
242
- points_b: np.ndarray,
243
- kappa: float = 0.1,
244
- ) -> float:
245
- """Compute Object Keypoint Similarity (OKS) between two instances.
246
-
247
- Uses a simplified OKS formula where all keypoints have equal weight
248
- and scale is derived from the bounding box of the reference instance.
249
-
250
- Args:
251
- points_a: Keypoints of first instance, shape (n_nodes, 2).
252
- points_b: Keypoints of second instance, shape (n_nodes, 2).
253
- kappa: Per-keypoint constant controlling falloff. Default 0.1.
254
-
255
- Returns:
256
- OKS value in [0, 1]. Higher means more similar.
257
- """
258
- # Find valid keypoints (present in both instances)
259
- valid_a = ~np.isnan(points_a).any(axis=1)
260
- valid_b = ~np.isnan(points_b).any(axis=1)
261
- valid = valid_a & valid_b
262
-
263
- if not valid.any():
264
- return 0.0
265
-
266
- # Compute scale from bounding box area of instance A
267
- pts_a_valid = points_a[valid_a]
268
- if len(pts_a_valid) < 2:
269
- return 0.0
270
-
271
- bbox_w = pts_a_valid[:, 0].max() - pts_a_valid[:, 0].min()
272
- bbox_h = pts_a_valid[:, 1].max() - pts_a_valid[:, 1].min()
273
- scale_sq = bbox_w * bbox_h
274
-
275
- if scale_sq <= 0:
276
- return 0.0
277
-
278
- # Compute squared distances for valid keypoints
279
- d_sq = np.sum((points_a[valid] - points_b[valid]) ** 2, axis=1)
280
-
281
- # OKS formula: mean of exp(-d^2 / (2 * s^2 * k^2))
282
- oks_per_kpt = np.exp(-d_sq / (2 * scale_sq * kappa**2))
283
-
284
- return float(np.mean(oks_per_kpt))