singlebehaviorlab 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. sam2/__init__.py +11 -0
  2. sam2/automatic_mask_generator.py +454 -0
  3. sam2/benchmark.py +92 -0
  4. sam2/build_sam.py +174 -0
  5. sam2/configs/sam2/sam2_hiera_b+.yaml +113 -0
  6. sam2/configs/sam2/sam2_hiera_l.yaml +117 -0
  7. sam2/configs/sam2/sam2_hiera_s.yaml +116 -0
  8. sam2/configs/sam2/sam2_hiera_t.yaml +118 -0
  9. sam2/configs/sam2.1/sam2.1_hiera_b+.yaml +116 -0
  10. sam2/configs/sam2.1/sam2.1_hiera_l.yaml +120 -0
  11. sam2/configs/sam2.1/sam2.1_hiera_s.yaml +119 -0
  12. sam2/configs/sam2.1/sam2.1_hiera_t.yaml +121 -0
  13. sam2/configs/sam2.1_training/sam2.1_hiera_b+_MOSE_finetune.yaml +339 -0
  14. sam2/modeling/__init__.py +5 -0
  15. sam2/modeling/backbones/__init__.py +5 -0
  16. sam2/modeling/backbones/hieradet.py +317 -0
  17. sam2/modeling/backbones/image_encoder.py +134 -0
  18. sam2/modeling/backbones/utils.py +93 -0
  19. sam2/modeling/memory_attention.py +169 -0
  20. sam2/modeling/memory_encoder.py +181 -0
  21. sam2/modeling/position_encoding.py +239 -0
  22. sam2/modeling/sam/__init__.py +5 -0
  23. sam2/modeling/sam/mask_decoder.py +295 -0
  24. sam2/modeling/sam/prompt_encoder.py +202 -0
  25. sam2/modeling/sam/transformer.py +311 -0
  26. sam2/modeling/sam2_base.py +913 -0
  27. sam2/modeling/sam2_utils.py +323 -0
  28. sam2/sam2_hiera_b+.yaml +113 -0
  29. sam2/sam2_hiera_l.yaml +117 -0
  30. sam2/sam2_hiera_s.yaml +116 -0
  31. sam2/sam2_hiera_t.yaml +118 -0
  32. sam2/sam2_image_predictor.py +466 -0
  33. sam2/sam2_video_predictor.py +1388 -0
  34. sam2/sam2_video_predictor_legacy.py +1172 -0
  35. sam2/utils/__init__.py +5 -0
  36. sam2/utils/amg.py +348 -0
  37. sam2/utils/misc.py +349 -0
  38. sam2/utils/transforms.py +118 -0
  39. singlebehaviorlab/__init__.py +4 -0
  40. singlebehaviorlab/__main__.py +130 -0
  41. singlebehaviorlab/_paths.py +100 -0
  42. singlebehaviorlab/backend/__init__.py +2 -0
  43. singlebehaviorlab/backend/augmentations.py +320 -0
  44. singlebehaviorlab/backend/data_store.py +420 -0
  45. singlebehaviorlab/backend/model.py +1290 -0
  46. singlebehaviorlab/backend/train.py +4667 -0
  47. singlebehaviorlab/backend/uncertainty.py +578 -0
  48. singlebehaviorlab/backend/video_processor.py +688 -0
  49. singlebehaviorlab/backend/video_utils.py +139 -0
  50. singlebehaviorlab/data/config/config.yaml +85 -0
  51. singlebehaviorlab/data/training_profiles.json +334 -0
  52. singlebehaviorlab/gui/__init__.py +4 -0
  53. singlebehaviorlab/gui/analysis_widget.py +2291 -0
  54. singlebehaviorlab/gui/attention_export.py +311 -0
  55. singlebehaviorlab/gui/clip_extraction_widget.py +481 -0
  56. singlebehaviorlab/gui/clustering_widget.py +3187 -0
  57. singlebehaviorlab/gui/inference_popups.py +1138 -0
  58. singlebehaviorlab/gui/inference_widget.py +4550 -0
  59. singlebehaviorlab/gui/inference_worker.py +651 -0
  60. singlebehaviorlab/gui/labeling_widget.py +2324 -0
  61. singlebehaviorlab/gui/main_window.py +754 -0
  62. singlebehaviorlab/gui/metadata_management_widget.py +1119 -0
  63. singlebehaviorlab/gui/motion_tracking.py +764 -0
  64. singlebehaviorlab/gui/overlay_export.py +1234 -0
  65. singlebehaviorlab/gui/plot_integration.py +729 -0
  66. singlebehaviorlab/gui/qt_helpers.py +29 -0
  67. singlebehaviorlab/gui/registration_widget.py +1485 -0
  68. singlebehaviorlab/gui/review_widget.py +1330 -0
  69. singlebehaviorlab/gui/segmentation_tracking_widget.py +2752 -0
  70. singlebehaviorlab/gui/tab_tutorial_dialog.py +312 -0
  71. singlebehaviorlab/gui/timeline_themes.py +131 -0
  72. singlebehaviorlab/gui/training_profiles.py +418 -0
  73. singlebehaviorlab/gui/training_widget.py +3719 -0
  74. singlebehaviorlab/gui/video_utils.py +233 -0
  75. singlebehaviorlab/licenses/SAM2-LICENSE +201 -0
  76. singlebehaviorlab/licenses/VideoPrism-LICENSE +202 -0
  77. singlebehaviorlab-2.0.0.dist-info/METADATA +447 -0
  78. singlebehaviorlab-2.0.0.dist-info/RECORD +88 -0
  79. singlebehaviorlab-2.0.0.dist-info/WHEEL +5 -0
  80. singlebehaviorlab-2.0.0.dist-info/entry_points.txt +2 -0
  81. singlebehaviorlab-2.0.0.dist-info/licenses/LICENSE +21 -0
  82. singlebehaviorlab-2.0.0.dist-info/top_level.txt +3 -0
  83. videoprism/__init__.py +0 -0
  84. videoprism/encoders.py +910 -0
  85. videoprism/layers.py +1136 -0
  86. videoprism/models.py +407 -0
  87. videoprism/tokenizers.py +167 -0
  88. videoprism/utils.py +168 -0
@@ -0,0 +1,578 @@
1
+ """
2
+ Uncertainty computation for active learning.
3
+
4
+ Ranks inference clips by model uncertainty per class to surface candidate
5
+ examples for human review and training-set enrichment.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import numpy as np
11
+
12
+
13
+ def _compute_clip_uncertainty(probs: list, is_ovr: bool = False) -> dict:
14
+ """
15
+ Compute uncertainty metrics for a single clip.
16
+
17
+ Returns:
18
+ margin – top_score minus second_score (lower = more uncertain)
19
+ entropy – Shannon entropy over the probability vector
20
+ top_class_idx – index of the highest-scoring class
21
+ top_score – raw score of the top class
22
+ second_score – raw score of the runner-up
23
+ """
24
+ arr = np.array(probs, dtype=np.float64)
25
+ if arr.size == 0:
26
+ return {"margin": 1.0, "entropy": 0.0, "top_class_idx": 0,
27
+ "top_score": 0.0, "second_score": 0.0}
28
+
29
+ sorted_idx = np.argsort(arr)[::-1]
30
+ top_score = float(arr[sorted_idx[0]])
31
+ second_score = float(arr[sorted_idx[1]]) if len(sorted_idx) > 1 else 0.0
32
+ margin = top_score - second_score
33
+
34
+ # Normalise for entropy (handles both softmax and independent-sigmoid outputs)
35
+ arr_norm = arr / (arr.sum() + 1e-8)
36
+ arr_clip = np.clip(arr_norm, 1e-10, 1.0)
37
+ entropy = float(-np.sum(arr_clip * np.log(arr_clip)))
38
+
39
+ return {
40
+ "margin": float(margin),
41
+ "entropy": float(entropy),
42
+ "top_class_idx": int(sorted_idx[0]),
43
+ "top_score": float(top_score),
44
+ "second_score": float(second_score),
45
+ }
46
+
47
+
48
+ def _build_clip_entries(results: dict, classes: list, is_ovr: bool = False) -> list:
49
+ """Build a flat list of clip candidates from inference results."""
50
+ all_entries = []
51
+ for video_path, res in results.items():
52
+ clip_probs = res.get("clip_probabilities") or []
53
+ clip_starts = res.get("clip_starts") or []
54
+ frame_interval = int(res.get("frame_interval") or 1)
55
+ for clip_idx, probs in enumerate(clip_probs):
56
+ if not probs:
57
+ continue
58
+ u = _compute_clip_uncertainty(probs, is_ovr=is_ovr)
59
+ top_idx = u["top_class_idx"]
60
+ start_frame = int(clip_starts[clip_idx]) if clip_idx < len(clip_starts) else 0
61
+ scores = {
62
+ classes[i]: float(probs[i])
63
+ for i in range(min(len(probs), len(classes)))
64
+ }
65
+ all_entries.append({
66
+ "video": video_path,
67
+ "clip_idx": clip_idx,
68
+ "start_frame": start_frame,
69
+ "frame_interval": frame_interval,
70
+ "predicted_class": classes[top_idx] if top_idx < len(classes) else "Unknown",
71
+ "predicted_class_idx": top_idx,
72
+ "scores": scores,
73
+ "margin": u["margin"],
74
+ "entropy": u["entropy"],
75
+ "top_score": u["top_score"],
76
+ })
77
+ return all_entries
78
+
79
+
80
+ def _build_center_merge_weights(length: int) -> np.ndarray:
81
+ if length <= 1:
82
+ return np.ones((max(1, length),), dtype=np.float32)
83
+ w = np.hanning(length).astype(np.float32)
84
+ if not np.any(w > 0):
85
+ return np.ones((length,), dtype=np.float32)
86
+ return np.clip(0.1 + 0.9 * w, 1e-3, None).astype(np.float32)
87
+
88
+
89
+ def _get_frame_scores_for_result(res: dict, classes: list, is_ovr: bool = False) -> np.ndarray | None:
90
+ scores = res.get("aggregated_frame_probs")
91
+ if isinstance(scores, np.ndarray) and scores.ndim == 2 and scores.shape[1] == len(classes):
92
+ return scores.astype(np.float32, copy=False)
93
+ if isinstance(scores, list) and scores:
94
+ arr = np.asarray(scores, dtype=np.float32)
95
+ if arr.ndim == 2 and arr.shape[1] == len(classes):
96
+ return arr
97
+
98
+ clip_frame_probabilities = res.get("clip_frame_probabilities") or []
99
+ clip_starts = res.get("clip_starts") or []
100
+ total_frames = int(res.get("total_frames") or 0)
101
+ frame_interval = max(1, int(res.get("frame_interval") or 1))
102
+ if not clip_frame_probabilities or not clip_starts or total_frames <= 0:
103
+ return None
104
+
105
+ num_classes = len(classes)
106
+ agg_probs = np.zeros((total_frames, num_classes), dtype=np.float32)
107
+ agg_counts = np.zeros((total_frames, 1), dtype=np.float32)
108
+ for i, probs in enumerate(clip_frame_probabilities):
109
+ if probs is None or i >= len(clip_starts):
110
+ continue
111
+ probs_arr = np.clip(np.asarray(probs, dtype=np.float32), 0.0, None)
112
+ if probs_arr.ndim != 2 or probs_arr.shape[1] != num_classes:
113
+ continue
114
+ merge_w = _build_center_merge_weights(int(probs_arr.shape[0]))
115
+ start_f = int(clip_starts[i])
116
+ for t in range(int(probs_arr.shape[0])):
117
+ f_start = start_f + t * frame_interval
118
+ f_end = min(f_start + frame_interval, total_frames)
119
+ if f_start >= total_frames:
120
+ break
121
+ if f_end <= f_start:
122
+ continue
123
+ w = float(merge_w[t])
124
+ agg_probs[f_start:f_end] += probs_arr[t][np.newaxis, :] * w
125
+ agg_counts[f_start:f_end] += w
126
+ covered = agg_counts.squeeze(-1) > 0
127
+ if not np.any(covered):
128
+ return None
129
+ agg_probs = agg_probs / np.maximum(agg_counts, 1.0)
130
+ if not is_ovr:
131
+ row_sums = agg_probs[covered].sum(axis=1, keepdims=True)
132
+ agg_probs[covered] = agg_probs[covered] / np.maximum(row_sums, 1e-8)
133
+ last_covered = int(np.max(np.where(covered))) + 1
134
+ return agg_probs[:last_covered]
135
+
136
+
137
+ def rank_clips_for_review(
138
+ results: dict,
139
+ classes: list,
140
+ n_per_class: int = 20,
141
+ is_ovr: bool = False,
142
+ ovr_boundary: tuple = (0.25, 0.75),
143
+ max_margin_softmax: float = 0.45,
144
+ ) -> dict:
145
+ """
146
+ For each class find the N most uncertain clips from inference results.
147
+
148
+ 'Uncertain about class C' means one of:
149
+ - OvR: C's sigmoid score is in the ambiguous range [ovr_boundary]
150
+ - OvR/softmax: C was the top prediction but the margin against the
151
+ runner-up is below max_margin_softmax
152
+ - softmax: C has non-trivial probability but is not the top prediction
153
+ (potential false-negative)
154
+
155
+ Args:
156
+ results: Inference result dict (video_path -> result_entry).
157
+ classes: Ordered list of class names.
158
+ n_per_class: How many clips to surface per class.
159
+ is_ovr: Whether the model uses OvR (independent sigmoids).
160
+ ovr_boundary: (low, high) sigmoid range considered ambiguous.
161
+ max_margin_softmax: Clips where top − second < this are considered uncertain.
162
+
163
+ Returns:
164
+ Dict {class_name: [entry, ...]} where each entry is a plain dict.
165
+ """
166
+ lo, hi = ovr_boundary
167
+
168
+ all_entries = _build_clip_entries(results, classes, is_ovr=is_ovr)
169
+
170
+ if not all_entries:
171
+ return {class_name: [] for class_name in classes}
172
+
173
+ for e in all_entries:
174
+ e["review_kind"] = "uncertain"
175
+
176
+ ranked = {}
177
+ for class_idx, class_name in enumerate(classes):
178
+ candidates = []
179
+
180
+ for e in all_entries:
181
+ class_score = e["scores"].get(class_name, 0.0)
182
+ is_top = e["predicted_class"] == class_name
183
+
184
+ if is_ovr:
185
+ near_boundary = lo <= class_score <= hi
186
+ top_and_uncertain = is_top and e["margin"] < max_margin_softmax
187
+ if near_boundary or top_and_uncertain:
188
+ # Uncertainty ∝ closeness to 0.5 for OvR score
189
+ u_score = 1.0 - abs(class_score - 0.5) * 2.0
190
+ candidates.append({**e, "_u": u_score, "_cs": class_score})
191
+ else:
192
+ top_and_uncertain = is_top and e["margin"] < max_margin_softmax
193
+ fn_candidate = not is_top and class_score > 0.20
194
+ if top_and_uncertain or fn_candidate:
195
+ u_score = (1.0 - e["margin"]) if top_and_uncertain else e["entropy"]
196
+ candidates.append({**e, "_u": u_score, "_cs": class_score})
197
+
198
+ # Sort by uncertainty descending, deduplicate (same clip, different pass)
199
+ seen = set()
200
+ unique = []
201
+ for e in sorted(candidates, key=lambda x: x["_u"], reverse=True):
202
+ key = (e["video"], e["clip_idx"])
203
+ if key not in seen:
204
+ seen.add(key)
205
+ entry = {k: v for k, v in e.items() if not k.startswith("_")}
206
+ entry["uncertainty_score"] = float(e["_u"])
207
+ entry["class_score"] = float(e["_cs"])
208
+ unique.append(entry)
209
+
210
+ ranked[class_name] = unique[:n_per_class]
211
+
212
+ return ranked
213
+
214
+
215
+ def rank_clips_per_video_for_review(
216
+ results: dict,
217
+ classes: list,
218
+ n_per_class: int = 20,
219
+ is_ovr: bool = False,
220
+ ovr_boundary: tuple = (0.25, 0.75),
221
+ max_margin_softmax: float = 0.45,
222
+ ) -> dict:
223
+ """Rank uncertain clips per class within each video."""
224
+ ranked = {class_name: {} for class_name in classes}
225
+ for video_path, res in results.items():
226
+ video_ranked = rank_clips_for_review(
227
+ {video_path: res},
228
+ classes,
229
+ n_per_class=n_per_class,
230
+ is_ovr=is_ovr,
231
+ ovr_boundary=ovr_boundary,
232
+ max_margin_softmax=max_margin_softmax,
233
+ )
234
+ for class_name in classes:
235
+ ranked[class_name][video_path] = list(video_ranked.get(class_name, []) or [])
236
+ return ranked
237
+
238
+
239
+ def rank_transition_clips_for_review(
240
+ results: dict,
241
+ classes: list,
242
+ clip_length: int,
243
+ is_ovr: bool = False,
244
+ n_per_class: int = 50,
245
+ ) -> dict:
246
+ """Mine transition-window candidates from precise frame timeline outputs."""
247
+ ranked = {class_name: [] for class_name in classes}
248
+ if clip_length <= 0 or not classes:
249
+ return ranked
250
+
251
+ for video_path, res in results.items():
252
+ frame_scores = _get_frame_scores_for_result(res, classes, is_ovr=is_ovr)
253
+ if frame_scores is None or frame_scores.shape[0] < 2:
254
+ continue
255
+ total_frames = int(res.get("total_frames") or frame_scores.shape[0])
256
+ frame_interval = max(1, int(res.get("frame_interval") or 1))
257
+ top_idx = np.argmax(frame_scores, axis=1)
258
+ segments = list(res.get("aggregated_segments") or [])
259
+ if len(segments) < 2:
260
+ run_start = 0
261
+ for fi in range(1, frame_scores.shape[0] + 1):
262
+ at_end = fi >= frame_scores.shape[0]
263
+ if at_end or int(top_idx[fi]) != int(top_idx[run_start]):
264
+ cls_idx = int(top_idx[run_start])
265
+ if 0 <= cls_idx < len(classes):
266
+ seg_end = fi - 1
267
+ segments.append({
268
+ "class": cls_idx,
269
+ "start": int(run_start),
270
+ "end": int(seg_end),
271
+ "confidence": float(np.mean(frame_scores[run_start:seg_end + 1, cls_idx])),
272
+ })
273
+ run_start = fi
274
+ if len(segments) < 2:
275
+ continue
276
+ sorted_scores = np.sort(frame_scores, axis=1)
277
+ if sorted_scores.shape[1] >= 2:
278
+ margins = sorted_scores[:, -1] - sorted_scores[:, -2]
279
+ else:
280
+ margins = sorted_scores[:, -1]
281
+ clip_span_frames = max(frame_interval, int(clip_length) * frame_interval)
282
+ half_span = max(frame_interval, (int(clip_length) // 2) * frame_interval)
283
+ max_start = max(0, total_frames - clip_span_frames)
284
+ seen_keys = set()
285
+
286
+ for left_seg, right_seg in zip(segments[:-1], segments[1:]):
287
+ left_cls = int(left_seg.get("class", -1))
288
+ right_cls = int(right_seg.get("class", -1))
289
+ if left_cls == right_cls or left_cls < 0 or right_cls < 0:
290
+ continue
291
+ if left_cls >= len(classes) or right_cls >= len(classes):
292
+ continue
293
+ boundary_frame = int(right_seg.get("start", left_seg.get("end", 0) + 1))
294
+ if boundary_frame <= 0 or boundary_frame >= frame_scores.shape[0]:
295
+ continue
296
+ desired_start = boundary_frame - half_span
297
+ start_frame = max(0, min(max_start, desired_start))
298
+ sampled_frames = [
299
+ min(frame_scores.shape[0] - 1, start_frame + t * frame_interval)
300
+ for t in range(int(clip_length))
301
+ ]
302
+ if not sampled_frames:
303
+ continue
304
+ proposed_frame_labels = [classes[int(top_idx[f])] for f in sampled_frames]
305
+ center_lo = max(0, boundary_frame - frame_interval)
306
+ center_hi = min(frame_scores.shape[0], boundary_frame + frame_interval + 1)
307
+ center_margin = float(np.mean(margins[center_lo:center_hi])) if center_hi > center_lo else float(margins[boundary_frame])
308
+ left_conf = float(left_seg.get("confidence", frame_scores[max(0, boundary_frame - 1), left_cls]))
309
+ right_conf = float(right_seg.get("confidence", frame_scores[min(frame_scores.shape[0] - 1, boundary_frame), right_cls]))
310
+ center_scores = frame_scores[sampled_frames[len(sampled_frames) // 2]]
311
+ transition_score = max(0.0, min(1.0, 0.5 * (left_conf + right_conf) + (1.0 - max(0.0, center_margin))))
312
+ key = (video_path, start_frame, left_cls, right_cls)
313
+ if key in seen_keys:
314
+ continue
315
+ seen_keys.add(key)
316
+ entry = {
317
+ "review_kind": "transition",
318
+ "video": video_path,
319
+ "clip_idx": int(boundary_frame),
320
+ "start_frame": int(start_frame),
321
+ "frame_interval": int(frame_interval),
322
+ "transition_frame": int(boundary_frame),
323
+ "left_class": classes[left_cls],
324
+ "right_class": classes[right_cls],
325
+ "predicted_class": f"{classes[left_cls]} -> {classes[right_cls]}",
326
+ "scores": {
327
+ classes[i]: float(center_scores[i])
328
+ for i in range(min(len(classes), center_scores.shape[0]))
329
+ },
330
+ "top_score": float(np.max(center_scores)),
331
+ "transition_score": float(transition_score),
332
+ "proposed_frame_labels": proposed_frame_labels,
333
+ }
334
+ ranked[classes[left_cls]].append(entry)
335
+ ranked[classes[right_cls]].append(entry)
336
+
337
+ deduped = {}
338
+ for class_name, entries in ranked.items():
339
+ seen = set()
340
+ unique = []
341
+ for entry in sorted(entries, key=lambda e: float(e.get("transition_score", 0.0)), reverse=True):
342
+ key = (entry.get("video", ""), int(entry.get("start_frame", -1)), entry.get("left_class", ""), entry.get("right_class", ""))
343
+ if key in seen:
344
+ continue
345
+ seen.add(key)
346
+ unique.append(entry)
347
+ deduped[class_name] = unique[:n_per_class]
348
+ return deduped
349
+
350
+
351
+ def rank_transition_clips_per_video_for_review(
352
+ results: dict,
353
+ classes: list,
354
+ clip_length: int,
355
+ is_ovr: bool = False,
356
+ n_per_class: int = 50,
357
+ ) -> dict:
358
+ ranked = {class_name: {} for class_name in classes}
359
+ for video_path, res in results.items():
360
+ video_ranked = rank_transition_clips_for_review(
361
+ {video_path: res},
362
+ classes,
363
+ clip_length=clip_length,
364
+ is_ovr=is_ovr,
365
+ n_per_class=n_per_class,
366
+ )
367
+ for class_name in classes:
368
+ ranked[class_name][video_path] = list(video_ranked.get(class_name, []) or [])
369
+ return ranked
370
+
371
+
372
+ def rank_confident_clips_for_review(
373
+ results: dict,
374
+ classes: list,
375
+ n_per_class: int = 100,
376
+ is_ovr: bool = False,
377
+ clip_length: int = 8,
378
+ min_gap_multiplier: float = 0.75,
379
+ ) -> dict:
380
+ """
381
+ Rank top confident clips with lightweight diversity across videos/time.
382
+
383
+ The selection is confidence-first, but uses round-robin across videos and
384
+ avoids choosing clips that are too close in time within the same video.
385
+ """
386
+ all_entries = _build_clip_entries(results, classes, is_ovr=is_ovr)
387
+ if not all_entries or n_per_class <= 0:
388
+ return {class_name: [] for class_name in classes}
389
+
390
+ for entry in all_entries:
391
+ entry["review_kind"] = "confident"
392
+ entry["confidence_score"] = float(entry.get("top_score", 0.0))
393
+
394
+ min_gap_frames = max(
395
+ 1,
396
+ int(round(max(1, int(clip_length)) * max(0.1, float(min_gap_multiplier))))
397
+ )
398
+
399
+ def is_far_enough(candidate: dict, selected: list) -> bool:
400
+ cand_video = candidate.get("video", "")
401
+ cand_start = int(candidate.get("start_frame", 0))
402
+ cand_interval = max(1, int(candidate.get("frame_interval", 1)))
403
+ gap_frames = int(round(min_gap_frames * cand_interval))
404
+ for prev in selected:
405
+ if prev.get("video", "") != cand_video:
406
+ continue
407
+ if abs(cand_start - int(prev.get("start_frame", 0))) < gap_frames:
408
+ return False
409
+ return True
410
+
411
+ def select_diverse_entries(entries: list, limit: int) -> list:
412
+ entries = sorted(
413
+ entries,
414
+ key=lambda e: (
415
+ -float(e.get("confidence_score", 0.0)),
416
+ os.path.basename(e.get("video", "")),
417
+ int(e.get("start_frame", 0)),
418
+ )
419
+ )
420
+
421
+ by_video = {}
422
+ for entry in entries:
423
+ by_video.setdefault(entry.get("video", ""), []).append(entry)
424
+
425
+ ordered_videos = sorted(
426
+ by_video.keys(),
427
+ key=lambda video: (
428
+ -float(by_video[video][0].get("confidence_score", 0.0)),
429
+ os.path.basename(video),
430
+ )
431
+ )
432
+
433
+ selected = []
434
+ selected_keys = set()
435
+ positions = {video: 0 for video in ordered_videos}
436
+
437
+ made_progress = True
438
+ while len(selected) < limit and made_progress:
439
+ made_progress = False
440
+ for video in ordered_videos:
441
+ candidates = by_video[video]
442
+ pos = positions[video]
443
+ while pos < len(candidates):
444
+ entry = candidates[pos]
445
+ pos += 1
446
+ key = (entry.get("video", ""), int(entry.get("clip_idx", -1)))
447
+ if key in selected_keys:
448
+ continue
449
+ if not is_far_enough(entry, selected):
450
+ continue
451
+ selected.append(entry)
452
+ selected_keys.add(key)
453
+ positions[video] = pos
454
+ made_progress = True
455
+ break
456
+ else:
457
+ positions[video] = pos
458
+
459
+ if len(selected) >= limit:
460
+ break
461
+
462
+ if len(selected) < limit:
463
+ for entry in entries:
464
+ key = (entry.get("video", ""), int(entry.get("clip_idx", -1)))
465
+ if key in selected_keys:
466
+ continue
467
+ selected.append(entry)
468
+ selected_keys.add(key)
469
+ if len(selected) >= limit:
470
+ break
471
+
472
+ return selected[:limit]
473
+
474
+ ranked = {}
475
+ for class_name in classes:
476
+ class_entries = []
477
+ for entry in all_entries:
478
+ class_score = float(entry.get("scores", {}).get(class_name, 0.0))
479
+ class_entries.append({
480
+ **entry,
481
+ "class_score": class_score,
482
+ "confidence_score": class_score,
483
+ "predicted_class": class_name,
484
+ })
485
+ ranked[class_name] = select_diverse_entries(class_entries, n_per_class)
486
+
487
+ return ranked
488
+
489
+
490
+ def rank_confident_clips_per_video_for_review(
491
+ results: dict,
492
+ classes: list,
493
+ n_per_class: int = 100,
494
+ is_ovr: bool = False,
495
+ clip_length: int = 8,
496
+ min_gap_multiplier: float = 0.75,
497
+ ) -> dict:
498
+ """Rank top confident clips per class within each video."""
499
+ ranked = {}
500
+ for class_name in classes:
501
+ ranked[class_name] = {}
502
+
503
+ for video_path, res in results.items():
504
+ video_ranked = rank_confident_clips_for_review(
505
+ {video_path: res},
506
+ classes,
507
+ n_per_class=n_per_class,
508
+ is_ovr=is_ovr,
509
+ clip_length=clip_length,
510
+ min_gap_multiplier=min_gap_multiplier,
511
+ )
512
+ for class_name in classes:
513
+ ranked[class_name][video_path] = list(video_ranked.get(class_name, []) or [])
514
+
515
+ return ranked
516
+
517
+
518
+ def save_uncertainty_report(
519
+ results: dict,
520
+ classes: list,
521
+ output_path: str,
522
+ is_ovr: bool = False,
523
+ n_per_class: int = 25,
524
+ clip_length: int = 8,
525
+ target_fps: int = 16,
526
+ ) -> dict:
527
+ """Compute uncertainty ranking and write it to a JSON file."""
528
+ ranked = rank_clips_for_review(
529
+ results, classes, n_per_class=n_per_class, is_ovr=is_ovr
530
+ )
531
+ ranked_per_video = rank_clips_per_video_for_review(
532
+ results, classes, n_per_class=n_per_class, is_ovr=is_ovr
533
+ )
534
+ transition_per_class = rank_transition_clips_for_review(
535
+ results,
536
+ classes,
537
+ clip_length=clip_length,
538
+ is_ovr=is_ovr,
539
+ n_per_class=max(50, n_per_class),
540
+ )
541
+ transition_per_class_per_video = rank_transition_clips_per_video_for_review(
542
+ results,
543
+ classes,
544
+ clip_length=clip_length,
545
+ is_ovr=is_ovr,
546
+ n_per_class=max(50, n_per_class),
547
+ )
548
+ confident_per_class = rank_confident_clips_for_review(
549
+ results,
550
+ classes,
551
+ n_per_class=max(100, n_per_class),
552
+ is_ovr=is_ovr,
553
+ clip_length=clip_length,
554
+ )
555
+ confident_per_class_per_video = rank_confident_clips_per_video_for_review(
556
+ results,
557
+ classes,
558
+ n_per_class=max(100, n_per_class),
559
+ is_ovr=is_ovr,
560
+ clip_length=clip_length,
561
+ )
562
+ report = {
563
+ "classes": classes,
564
+ "is_ovr": is_ovr,
565
+ "n_per_class": n_per_class,
566
+ "clip_length": clip_length,
567
+ "target_fps": target_fps,
568
+ "per_class": ranked,
569
+ "per_class_per_video": ranked_per_video,
570
+ "transition_per_class": transition_per_class,
571
+ "transition_per_class_per_video": transition_per_class_per_video,
572
+ "confident_per_class": confident_per_class,
573
+ "confident_per_class_per_video": confident_per_class_per_video,
574
+ }
575
+ os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
576
+ with open(output_path, "w", encoding="utf-8") as f:
577
+ json.dump(report, f, indent=2)
578
+ return report