singlebehaviorlab 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sam2/__init__.py +11 -0
- sam2/automatic_mask_generator.py +454 -0
- sam2/benchmark.py +92 -0
- sam2/build_sam.py +174 -0
- sam2/configs/sam2/sam2_hiera_b+.yaml +113 -0
- sam2/configs/sam2/sam2_hiera_l.yaml +117 -0
- sam2/configs/sam2/sam2_hiera_s.yaml +116 -0
- sam2/configs/sam2/sam2_hiera_t.yaml +118 -0
- sam2/configs/sam2.1/sam2.1_hiera_b+.yaml +116 -0
- sam2/configs/sam2.1/sam2.1_hiera_l.yaml +120 -0
- sam2/configs/sam2.1/sam2.1_hiera_s.yaml +119 -0
- sam2/configs/sam2.1/sam2.1_hiera_t.yaml +121 -0
- sam2/configs/sam2.1_training/sam2.1_hiera_b+_MOSE_finetune.yaml +339 -0
- sam2/modeling/__init__.py +5 -0
- sam2/modeling/backbones/__init__.py +5 -0
- sam2/modeling/backbones/hieradet.py +317 -0
- sam2/modeling/backbones/image_encoder.py +134 -0
- sam2/modeling/backbones/utils.py +93 -0
- sam2/modeling/memory_attention.py +169 -0
- sam2/modeling/memory_encoder.py +181 -0
- sam2/modeling/position_encoding.py +239 -0
- sam2/modeling/sam/__init__.py +5 -0
- sam2/modeling/sam/mask_decoder.py +295 -0
- sam2/modeling/sam/prompt_encoder.py +202 -0
- sam2/modeling/sam/transformer.py +311 -0
- sam2/modeling/sam2_base.py +913 -0
- sam2/modeling/sam2_utils.py +323 -0
- sam2/sam2_hiera_b+.yaml +113 -0
- sam2/sam2_hiera_l.yaml +117 -0
- sam2/sam2_hiera_s.yaml +116 -0
- sam2/sam2_hiera_t.yaml +118 -0
- sam2/sam2_image_predictor.py +466 -0
- sam2/sam2_video_predictor.py +1388 -0
- sam2/sam2_video_predictor_legacy.py +1172 -0
- sam2/utils/__init__.py +5 -0
- sam2/utils/amg.py +348 -0
- sam2/utils/misc.py +349 -0
- sam2/utils/transforms.py +118 -0
- singlebehaviorlab/__init__.py +4 -0
- singlebehaviorlab/__main__.py +130 -0
- singlebehaviorlab/_paths.py +100 -0
- singlebehaviorlab/backend/__init__.py +2 -0
- singlebehaviorlab/backend/augmentations.py +320 -0
- singlebehaviorlab/backend/data_store.py +420 -0
- singlebehaviorlab/backend/model.py +1290 -0
- singlebehaviorlab/backend/train.py +4667 -0
- singlebehaviorlab/backend/uncertainty.py +578 -0
- singlebehaviorlab/backend/video_processor.py +688 -0
- singlebehaviorlab/backend/video_utils.py +139 -0
- singlebehaviorlab/data/config/config.yaml +85 -0
- singlebehaviorlab/data/training_profiles.json +334 -0
- singlebehaviorlab/gui/__init__.py +4 -0
- singlebehaviorlab/gui/analysis_widget.py +2291 -0
- singlebehaviorlab/gui/attention_export.py +311 -0
- singlebehaviorlab/gui/clip_extraction_widget.py +481 -0
- singlebehaviorlab/gui/clustering_widget.py +3187 -0
- singlebehaviorlab/gui/inference_popups.py +1138 -0
- singlebehaviorlab/gui/inference_widget.py +4550 -0
- singlebehaviorlab/gui/inference_worker.py +651 -0
- singlebehaviorlab/gui/labeling_widget.py +2324 -0
- singlebehaviorlab/gui/main_window.py +754 -0
- singlebehaviorlab/gui/metadata_management_widget.py +1119 -0
- singlebehaviorlab/gui/motion_tracking.py +764 -0
- singlebehaviorlab/gui/overlay_export.py +1234 -0
- singlebehaviorlab/gui/plot_integration.py +729 -0
- singlebehaviorlab/gui/qt_helpers.py +29 -0
- singlebehaviorlab/gui/registration_widget.py +1485 -0
- singlebehaviorlab/gui/review_widget.py +1330 -0
- singlebehaviorlab/gui/segmentation_tracking_widget.py +2752 -0
- singlebehaviorlab/gui/tab_tutorial_dialog.py +312 -0
- singlebehaviorlab/gui/timeline_themes.py +131 -0
- singlebehaviorlab/gui/training_profiles.py +418 -0
- singlebehaviorlab/gui/training_widget.py +3719 -0
- singlebehaviorlab/gui/video_utils.py +233 -0
- singlebehaviorlab/licenses/SAM2-LICENSE +201 -0
- singlebehaviorlab/licenses/VideoPrism-LICENSE +202 -0
- singlebehaviorlab-2.0.0.dist-info/METADATA +447 -0
- singlebehaviorlab-2.0.0.dist-info/RECORD +88 -0
- singlebehaviorlab-2.0.0.dist-info/WHEEL +5 -0
- singlebehaviorlab-2.0.0.dist-info/entry_points.txt +2 -0
- singlebehaviorlab-2.0.0.dist-info/licenses/LICENSE +21 -0
- singlebehaviorlab-2.0.0.dist-info/top_level.txt +3 -0
- videoprism/__init__.py +0 -0
- videoprism/encoders.py +910 -0
- videoprism/layers.py +1136 -0
- videoprism/models.py +407 -0
- videoprism/tokenizers.py +167 -0
- videoprism/utils.py +168 -0
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Uncertainty computation for active learning.
|
|
3
|
+
|
|
4
|
+
Ranks inference clips by model uncertainty per class to surface candidate
|
|
5
|
+
examples for human review and training-set enrichment.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _compute_clip_uncertainty(probs: list, is_ovr: bool = False) -> dict:
|
|
14
|
+
"""
|
|
15
|
+
Compute uncertainty metrics for a single clip.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
margin – top_score minus second_score (lower = more uncertain)
|
|
19
|
+
entropy – Shannon entropy over the probability vector
|
|
20
|
+
top_class_idx – index of the highest-scoring class
|
|
21
|
+
top_score – raw score of the top class
|
|
22
|
+
second_score – raw score of the runner-up
|
|
23
|
+
"""
|
|
24
|
+
arr = np.array(probs, dtype=np.float64)
|
|
25
|
+
if arr.size == 0:
|
|
26
|
+
return {"margin": 1.0, "entropy": 0.0, "top_class_idx": 0,
|
|
27
|
+
"top_score": 0.0, "second_score": 0.0}
|
|
28
|
+
|
|
29
|
+
sorted_idx = np.argsort(arr)[::-1]
|
|
30
|
+
top_score = float(arr[sorted_idx[0]])
|
|
31
|
+
second_score = float(arr[sorted_idx[1]]) if len(sorted_idx) > 1 else 0.0
|
|
32
|
+
margin = top_score - second_score
|
|
33
|
+
|
|
34
|
+
# Normalise for entropy (handles both softmax and independent-sigmoid outputs)
|
|
35
|
+
arr_norm = arr / (arr.sum() + 1e-8)
|
|
36
|
+
arr_clip = np.clip(arr_norm, 1e-10, 1.0)
|
|
37
|
+
entropy = float(-np.sum(arr_clip * np.log(arr_clip)))
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
"margin": float(margin),
|
|
41
|
+
"entropy": float(entropy),
|
|
42
|
+
"top_class_idx": int(sorted_idx[0]),
|
|
43
|
+
"top_score": float(top_score),
|
|
44
|
+
"second_score": float(second_score),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _build_clip_entries(results: dict, classes: list, is_ovr: bool = False) -> list:
|
|
49
|
+
"""Build a flat list of clip candidates from inference results."""
|
|
50
|
+
all_entries = []
|
|
51
|
+
for video_path, res in results.items():
|
|
52
|
+
clip_probs = res.get("clip_probabilities") or []
|
|
53
|
+
clip_starts = res.get("clip_starts") or []
|
|
54
|
+
frame_interval = int(res.get("frame_interval") or 1)
|
|
55
|
+
for clip_idx, probs in enumerate(clip_probs):
|
|
56
|
+
if not probs:
|
|
57
|
+
continue
|
|
58
|
+
u = _compute_clip_uncertainty(probs, is_ovr=is_ovr)
|
|
59
|
+
top_idx = u["top_class_idx"]
|
|
60
|
+
start_frame = int(clip_starts[clip_idx]) if clip_idx < len(clip_starts) else 0
|
|
61
|
+
scores = {
|
|
62
|
+
classes[i]: float(probs[i])
|
|
63
|
+
for i in range(min(len(probs), len(classes)))
|
|
64
|
+
}
|
|
65
|
+
all_entries.append({
|
|
66
|
+
"video": video_path,
|
|
67
|
+
"clip_idx": clip_idx,
|
|
68
|
+
"start_frame": start_frame,
|
|
69
|
+
"frame_interval": frame_interval,
|
|
70
|
+
"predicted_class": classes[top_idx] if top_idx < len(classes) else "Unknown",
|
|
71
|
+
"predicted_class_idx": top_idx,
|
|
72
|
+
"scores": scores,
|
|
73
|
+
"margin": u["margin"],
|
|
74
|
+
"entropy": u["entropy"],
|
|
75
|
+
"top_score": u["top_score"],
|
|
76
|
+
})
|
|
77
|
+
return all_entries
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _build_center_merge_weights(length: int) -> np.ndarray:
|
|
81
|
+
if length <= 1:
|
|
82
|
+
return np.ones((max(1, length),), dtype=np.float32)
|
|
83
|
+
w = np.hanning(length).astype(np.float32)
|
|
84
|
+
if not np.any(w > 0):
|
|
85
|
+
return np.ones((length,), dtype=np.float32)
|
|
86
|
+
return np.clip(0.1 + 0.9 * w, 1e-3, None).astype(np.float32)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _get_frame_scores_for_result(res: dict, classes: list, is_ovr: bool = False) -> np.ndarray | None:
|
|
90
|
+
scores = res.get("aggregated_frame_probs")
|
|
91
|
+
if isinstance(scores, np.ndarray) and scores.ndim == 2 and scores.shape[1] == len(classes):
|
|
92
|
+
return scores.astype(np.float32, copy=False)
|
|
93
|
+
if isinstance(scores, list) and scores:
|
|
94
|
+
arr = np.asarray(scores, dtype=np.float32)
|
|
95
|
+
if arr.ndim == 2 and arr.shape[1] == len(classes):
|
|
96
|
+
return arr
|
|
97
|
+
|
|
98
|
+
clip_frame_probabilities = res.get("clip_frame_probabilities") or []
|
|
99
|
+
clip_starts = res.get("clip_starts") or []
|
|
100
|
+
total_frames = int(res.get("total_frames") or 0)
|
|
101
|
+
frame_interval = max(1, int(res.get("frame_interval") or 1))
|
|
102
|
+
if not clip_frame_probabilities or not clip_starts or total_frames <= 0:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
num_classes = len(classes)
|
|
106
|
+
agg_probs = np.zeros((total_frames, num_classes), dtype=np.float32)
|
|
107
|
+
agg_counts = np.zeros((total_frames, 1), dtype=np.float32)
|
|
108
|
+
for i, probs in enumerate(clip_frame_probabilities):
|
|
109
|
+
if probs is None or i >= len(clip_starts):
|
|
110
|
+
continue
|
|
111
|
+
probs_arr = np.clip(np.asarray(probs, dtype=np.float32), 0.0, None)
|
|
112
|
+
if probs_arr.ndim != 2 or probs_arr.shape[1] != num_classes:
|
|
113
|
+
continue
|
|
114
|
+
merge_w = _build_center_merge_weights(int(probs_arr.shape[0]))
|
|
115
|
+
start_f = int(clip_starts[i])
|
|
116
|
+
for t in range(int(probs_arr.shape[0])):
|
|
117
|
+
f_start = start_f + t * frame_interval
|
|
118
|
+
f_end = min(f_start + frame_interval, total_frames)
|
|
119
|
+
if f_start >= total_frames:
|
|
120
|
+
break
|
|
121
|
+
if f_end <= f_start:
|
|
122
|
+
continue
|
|
123
|
+
w = float(merge_w[t])
|
|
124
|
+
agg_probs[f_start:f_end] += probs_arr[t][np.newaxis, :] * w
|
|
125
|
+
agg_counts[f_start:f_end] += w
|
|
126
|
+
covered = agg_counts.squeeze(-1) > 0
|
|
127
|
+
if not np.any(covered):
|
|
128
|
+
return None
|
|
129
|
+
agg_probs = agg_probs / np.maximum(agg_counts, 1.0)
|
|
130
|
+
if not is_ovr:
|
|
131
|
+
row_sums = agg_probs[covered].sum(axis=1, keepdims=True)
|
|
132
|
+
agg_probs[covered] = agg_probs[covered] / np.maximum(row_sums, 1e-8)
|
|
133
|
+
last_covered = int(np.max(np.where(covered))) + 1
|
|
134
|
+
return agg_probs[:last_covered]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def rank_clips_for_review(
|
|
138
|
+
results: dict,
|
|
139
|
+
classes: list,
|
|
140
|
+
n_per_class: int = 20,
|
|
141
|
+
is_ovr: bool = False,
|
|
142
|
+
ovr_boundary: tuple = (0.25, 0.75),
|
|
143
|
+
max_margin_softmax: float = 0.45,
|
|
144
|
+
) -> dict:
|
|
145
|
+
"""
|
|
146
|
+
For each class find the N most uncertain clips from inference results.
|
|
147
|
+
|
|
148
|
+
'Uncertain about class C' means one of:
|
|
149
|
+
- OvR: C's sigmoid score is in the ambiguous range [ovr_boundary]
|
|
150
|
+
- OvR/softmax: C was the top prediction but the margin against the
|
|
151
|
+
runner-up is below max_margin_softmax
|
|
152
|
+
- softmax: C has non-trivial probability but is not the top prediction
|
|
153
|
+
(potential false-negative)
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
results: Inference result dict (video_path -> result_entry).
|
|
157
|
+
classes: Ordered list of class names.
|
|
158
|
+
n_per_class: How many clips to surface per class.
|
|
159
|
+
is_ovr: Whether the model uses OvR (independent sigmoids).
|
|
160
|
+
ovr_boundary: (low, high) sigmoid range considered ambiguous.
|
|
161
|
+
max_margin_softmax: Clips where top − second < this are considered uncertain.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dict {class_name: [entry, ...]} where each entry is a plain dict.
|
|
165
|
+
"""
|
|
166
|
+
lo, hi = ovr_boundary
|
|
167
|
+
|
|
168
|
+
all_entries = _build_clip_entries(results, classes, is_ovr=is_ovr)
|
|
169
|
+
|
|
170
|
+
if not all_entries:
|
|
171
|
+
return {class_name: [] for class_name in classes}
|
|
172
|
+
|
|
173
|
+
for e in all_entries:
|
|
174
|
+
e["review_kind"] = "uncertain"
|
|
175
|
+
|
|
176
|
+
ranked = {}
|
|
177
|
+
for class_idx, class_name in enumerate(classes):
|
|
178
|
+
candidates = []
|
|
179
|
+
|
|
180
|
+
for e in all_entries:
|
|
181
|
+
class_score = e["scores"].get(class_name, 0.0)
|
|
182
|
+
is_top = e["predicted_class"] == class_name
|
|
183
|
+
|
|
184
|
+
if is_ovr:
|
|
185
|
+
near_boundary = lo <= class_score <= hi
|
|
186
|
+
top_and_uncertain = is_top and e["margin"] < max_margin_softmax
|
|
187
|
+
if near_boundary or top_and_uncertain:
|
|
188
|
+
# Uncertainty ∝ closeness to 0.5 for OvR score
|
|
189
|
+
u_score = 1.0 - abs(class_score - 0.5) * 2.0
|
|
190
|
+
candidates.append({**e, "_u": u_score, "_cs": class_score})
|
|
191
|
+
else:
|
|
192
|
+
top_and_uncertain = is_top and e["margin"] < max_margin_softmax
|
|
193
|
+
fn_candidate = not is_top and class_score > 0.20
|
|
194
|
+
if top_and_uncertain or fn_candidate:
|
|
195
|
+
u_score = (1.0 - e["margin"]) if top_and_uncertain else e["entropy"]
|
|
196
|
+
candidates.append({**e, "_u": u_score, "_cs": class_score})
|
|
197
|
+
|
|
198
|
+
# Sort by uncertainty descending, deduplicate (same clip, different pass)
|
|
199
|
+
seen = set()
|
|
200
|
+
unique = []
|
|
201
|
+
for e in sorted(candidates, key=lambda x: x["_u"], reverse=True):
|
|
202
|
+
key = (e["video"], e["clip_idx"])
|
|
203
|
+
if key not in seen:
|
|
204
|
+
seen.add(key)
|
|
205
|
+
entry = {k: v for k, v in e.items() if not k.startswith("_")}
|
|
206
|
+
entry["uncertainty_score"] = float(e["_u"])
|
|
207
|
+
entry["class_score"] = float(e["_cs"])
|
|
208
|
+
unique.append(entry)
|
|
209
|
+
|
|
210
|
+
ranked[class_name] = unique[:n_per_class]
|
|
211
|
+
|
|
212
|
+
return ranked
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def rank_clips_per_video_for_review(
|
|
216
|
+
results: dict,
|
|
217
|
+
classes: list,
|
|
218
|
+
n_per_class: int = 20,
|
|
219
|
+
is_ovr: bool = False,
|
|
220
|
+
ovr_boundary: tuple = (0.25, 0.75),
|
|
221
|
+
max_margin_softmax: float = 0.45,
|
|
222
|
+
) -> dict:
|
|
223
|
+
"""Rank uncertain clips per class within each video."""
|
|
224
|
+
ranked = {class_name: {} for class_name in classes}
|
|
225
|
+
for video_path, res in results.items():
|
|
226
|
+
video_ranked = rank_clips_for_review(
|
|
227
|
+
{video_path: res},
|
|
228
|
+
classes,
|
|
229
|
+
n_per_class=n_per_class,
|
|
230
|
+
is_ovr=is_ovr,
|
|
231
|
+
ovr_boundary=ovr_boundary,
|
|
232
|
+
max_margin_softmax=max_margin_softmax,
|
|
233
|
+
)
|
|
234
|
+
for class_name in classes:
|
|
235
|
+
ranked[class_name][video_path] = list(video_ranked.get(class_name, []) or [])
|
|
236
|
+
return ranked
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def rank_transition_clips_for_review(
|
|
240
|
+
results: dict,
|
|
241
|
+
classes: list,
|
|
242
|
+
clip_length: int,
|
|
243
|
+
is_ovr: bool = False,
|
|
244
|
+
n_per_class: int = 50,
|
|
245
|
+
) -> dict:
|
|
246
|
+
"""Mine transition-window candidates from precise frame timeline outputs."""
|
|
247
|
+
ranked = {class_name: [] for class_name in classes}
|
|
248
|
+
if clip_length <= 0 or not classes:
|
|
249
|
+
return ranked
|
|
250
|
+
|
|
251
|
+
for video_path, res in results.items():
|
|
252
|
+
frame_scores = _get_frame_scores_for_result(res, classes, is_ovr=is_ovr)
|
|
253
|
+
if frame_scores is None or frame_scores.shape[0] < 2:
|
|
254
|
+
continue
|
|
255
|
+
total_frames = int(res.get("total_frames") or frame_scores.shape[0])
|
|
256
|
+
frame_interval = max(1, int(res.get("frame_interval") or 1))
|
|
257
|
+
top_idx = np.argmax(frame_scores, axis=1)
|
|
258
|
+
segments = list(res.get("aggregated_segments") or [])
|
|
259
|
+
if len(segments) < 2:
|
|
260
|
+
run_start = 0
|
|
261
|
+
for fi in range(1, frame_scores.shape[0] + 1):
|
|
262
|
+
at_end = fi >= frame_scores.shape[0]
|
|
263
|
+
if at_end or int(top_idx[fi]) != int(top_idx[run_start]):
|
|
264
|
+
cls_idx = int(top_idx[run_start])
|
|
265
|
+
if 0 <= cls_idx < len(classes):
|
|
266
|
+
seg_end = fi - 1
|
|
267
|
+
segments.append({
|
|
268
|
+
"class": cls_idx,
|
|
269
|
+
"start": int(run_start),
|
|
270
|
+
"end": int(seg_end),
|
|
271
|
+
"confidence": float(np.mean(frame_scores[run_start:seg_end + 1, cls_idx])),
|
|
272
|
+
})
|
|
273
|
+
run_start = fi
|
|
274
|
+
if len(segments) < 2:
|
|
275
|
+
continue
|
|
276
|
+
sorted_scores = np.sort(frame_scores, axis=1)
|
|
277
|
+
if sorted_scores.shape[1] >= 2:
|
|
278
|
+
margins = sorted_scores[:, -1] - sorted_scores[:, -2]
|
|
279
|
+
else:
|
|
280
|
+
margins = sorted_scores[:, -1]
|
|
281
|
+
clip_span_frames = max(frame_interval, int(clip_length) * frame_interval)
|
|
282
|
+
half_span = max(frame_interval, (int(clip_length) // 2) * frame_interval)
|
|
283
|
+
max_start = max(0, total_frames - clip_span_frames)
|
|
284
|
+
seen_keys = set()
|
|
285
|
+
|
|
286
|
+
for left_seg, right_seg in zip(segments[:-1], segments[1:]):
|
|
287
|
+
left_cls = int(left_seg.get("class", -1))
|
|
288
|
+
right_cls = int(right_seg.get("class", -1))
|
|
289
|
+
if left_cls == right_cls or left_cls < 0 or right_cls < 0:
|
|
290
|
+
continue
|
|
291
|
+
if left_cls >= len(classes) or right_cls >= len(classes):
|
|
292
|
+
continue
|
|
293
|
+
boundary_frame = int(right_seg.get("start", left_seg.get("end", 0) + 1))
|
|
294
|
+
if boundary_frame <= 0 or boundary_frame >= frame_scores.shape[0]:
|
|
295
|
+
continue
|
|
296
|
+
desired_start = boundary_frame - half_span
|
|
297
|
+
start_frame = max(0, min(max_start, desired_start))
|
|
298
|
+
sampled_frames = [
|
|
299
|
+
min(frame_scores.shape[0] - 1, start_frame + t * frame_interval)
|
|
300
|
+
for t in range(int(clip_length))
|
|
301
|
+
]
|
|
302
|
+
if not sampled_frames:
|
|
303
|
+
continue
|
|
304
|
+
proposed_frame_labels = [classes[int(top_idx[f])] for f in sampled_frames]
|
|
305
|
+
center_lo = max(0, boundary_frame - frame_interval)
|
|
306
|
+
center_hi = min(frame_scores.shape[0], boundary_frame + frame_interval + 1)
|
|
307
|
+
center_margin = float(np.mean(margins[center_lo:center_hi])) if center_hi > center_lo else float(margins[boundary_frame])
|
|
308
|
+
left_conf = float(left_seg.get("confidence", frame_scores[max(0, boundary_frame - 1), left_cls]))
|
|
309
|
+
right_conf = float(right_seg.get("confidence", frame_scores[min(frame_scores.shape[0] - 1, boundary_frame), right_cls]))
|
|
310
|
+
center_scores = frame_scores[sampled_frames[len(sampled_frames) // 2]]
|
|
311
|
+
transition_score = max(0.0, min(1.0, 0.5 * (left_conf + right_conf) + (1.0 - max(0.0, center_margin))))
|
|
312
|
+
key = (video_path, start_frame, left_cls, right_cls)
|
|
313
|
+
if key in seen_keys:
|
|
314
|
+
continue
|
|
315
|
+
seen_keys.add(key)
|
|
316
|
+
entry = {
|
|
317
|
+
"review_kind": "transition",
|
|
318
|
+
"video": video_path,
|
|
319
|
+
"clip_idx": int(boundary_frame),
|
|
320
|
+
"start_frame": int(start_frame),
|
|
321
|
+
"frame_interval": int(frame_interval),
|
|
322
|
+
"transition_frame": int(boundary_frame),
|
|
323
|
+
"left_class": classes[left_cls],
|
|
324
|
+
"right_class": classes[right_cls],
|
|
325
|
+
"predicted_class": f"{classes[left_cls]} -> {classes[right_cls]}",
|
|
326
|
+
"scores": {
|
|
327
|
+
classes[i]: float(center_scores[i])
|
|
328
|
+
for i in range(min(len(classes), center_scores.shape[0]))
|
|
329
|
+
},
|
|
330
|
+
"top_score": float(np.max(center_scores)),
|
|
331
|
+
"transition_score": float(transition_score),
|
|
332
|
+
"proposed_frame_labels": proposed_frame_labels,
|
|
333
|
+
}
|
|
334
|
+
ranked[classes[left_cls]].append(entry)
|
|
335
|
+
ranked[classes[right_cls]].append(entry)
|
|
336
|
+
|
|
337
|
+
deduped = {}
|
|
338
|
+
for class_name, entries in ranked.items():
|
|
339
|
+
seen = set()
|
|
340
|
+
unique = []
|
|
341
|
+
for entry in sorted(entries, key=lambda e: float(e.get("transition_score", 0.0)), reverse=True):
|
|
342
|
+
key = (entry.get("video", ""), int(entry.get("start_frame", -1)), entry.get("left_class", ""), entry.get("right_class", ""))
|
|
343
|
+
if key in seen:
|
|
344
|
+
continue
|
|
345
|
+
seen.add(key)
|
|
346
|
+
unique.append(entry)
|
|
347
|
+
deduped[class_name] = unique[:n_per_class]
|
|
348
|
+
return deduped
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def rank_transition_clips_per_video_for_review(
|
|
352
|
+
results: dict,
|
|
353
|
+
classes: list,
|
|
354
|
+
clip_length: int,
|
|
355
|
+
is_ovr: bool = False,
|
|
356
|
+
n_per_class: int = 50,
|
|
357
|
+
) -> dict:
|
|
358
|
+
ranked = {class_name: {} for class_name in classes}
|
|
359
|
+
for video_path, res in results.items():
|
|
360
|
+
video_ranked = rank_transition_clips_for_review(
|
|
361
|
+
{video_path: res},
|
|
362
|
+
classes,
|
|
363
|
+
clip_length=clip_length,
|
|
364
|
+
is_ovr=is_ovr,
|
|
365
|
+
n_per_class=n_per_class,
|
|
366
|
+
)
|
|
367
|
+
for class_name in classes:
|
|
368
|
+
ranked[class_name][video_path] = list(video_ranked.get(class_name, []) or [])
|
|
369
|
+
return ranked
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def rank_confident_clips_for_review(
|
|
373
|
+
results: dict,
|
|
374
|
+
classes: list,
|
|
375
|
+
n_per_class: int = 100,
|
|
376
|
+
is_ovr: bool = False,
|
|
377
|
+
clip_length: int = 8,
|
|
378
|
+
min_gap_multiplier: float = 0.75,
|
|
379
|
+
) -> dict:
|
|
380
|
+
"""
|
|
381
|
+
Rank top confident clips with lightweight diversity across videos/time.
|
|
382
|
+
|
|
383
|
+
The selection is confidence-first, but uses round-robin across videos and
|
|
384
|
+
avoids choosing clips that are too close in time within the same video.
|
|
385
|
+
"""
|
|
386
|
+
all_entries = _build_clip_entries(results, classes, is_ovr=is_ovr)
|
|
387
|
+
if not all_entries or n_per_class <= 0:
|
|
388
|
+
return {class_name: [] for class_name in classes}
|
|
389
|
+
|
|
390
|
+
for entry in all_entries:
|
|
391
|
+
entry["review_kind"] = "confident"
|
|
392
|
+
entry["confidence_score"] = float(entry.get("top_score", 0.0))
|
|
393
|
+
|
|
394
|
+
min_gap_frames = max(
|
|
395
|
+
1,
|
|
396
|
+
int(round(max(1, int(clip_length)) * max(0.1, float(min_gap_multiplier))))
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def is_far_enough(candidate: dict, selected: list) -> bool:
|
|
400
|
+
cand_video = candidate.get("video", "")
|
|
401
|
+
cand_start = int(candidate.get("start_frame", 0))
|
|
402
|
+
cand_interval = max(1, int(candidate.get("frame_interval", 1)))
|
|
403
|
+
gap_frames = int(round(min_gap_frames * cand_interval))
|
|
404
|
+
for prev in selected:
|
|
405
|
+
if prev.get("video", "") != cand_video:
|
|
406
|
+
continue
|
|
407
|
+
if abs(cand_start - int(prev.get("start_frame", 0))) < gap_frames:
|
|
408
|
+
return False
|
|
409
|
+
return True
|
|
410
|
+
|
|
411
|
+
def select_diverse_entries(entries: list, limit: int) -> list:
|
|
412
|
+
entries = sorted(
|
|
413
|
+
entries,
|
|
414
|
+
key=lambda e: (
|
|
415
|
+
-float(e.get("confidence_score", 0.0)),
|
|
416
|
+
os.path.basename(e.get("video", "")),
|
|
417
|
+
int(e.get("start_frame", 0)),
|
|
418
|
+
)
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
by_video = {}
|
|
422
|
+
for entry in entries:
|
|
423
|
+
by_video.setdefault(entry.get("video", ""), []).append(entry)
|
|
424
|
+
|
|
425
|
+
ordered_videos = sorted(
|
|
426
|
+
by_video.keys(),
|
|
427
|
+
key=lambda video: (
|
|
428
|
+
-float(by_video[video][0].get("confidence_score", 0.0)),
|
|
429
|
+
os.path.basename(video),
|
|
430
|
+
)
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
selected = []
|
|
434
|
+
selected_keys = set()
|
|
435
|
+
positions = {video: 0 for video in ordered_videos}
|
|
436
|
+
|
|
437
|
+
made_progress = True
|
|
438
|
+
while len(selected) < limit and made_progress:
|
|
439
|
+
made_progress = False
|
|
440
|
+
for video in ordered_videos:
|
|
441
|
+
candidates = by_video[video]
|
|
442
|
+
pos = positions[video]
|
|
443
|
+
while pos < len(candidates):
|
|
444
|
+
entry = candidates[pos]
|
|
445
|
+
pos += 1
|
|
446
|
+
key = (entry.get("video", ""), int(entry.get("clip_idx", -1)))
|
|
447
|
+
if key in selected_keys:
|
|
448
|
+
continue
|
|
449
|
+
if not is_far_enough(entry, selected):
|
|
450
|
+
continue
|
|
451
|
+
selected.append(entry)
|
|
452
|
+
selected_keys.add(key)
|
|
453
|
+
positions[video] = pos
|
|
454
|
+
made_progress = True
|
|
455
|
+
break
|
|
456
|
+
else:
|
|
457
|
+
positions[video] = pos
|
|
458
|
+
|
|
459
|
+
if len(selected) >= limit:
|
|
460
|
+
break
|
|
461
|
+
|
|
462
|
+
if len(selected) < limit:
|
|
463
|
+
for entry in entries:
|
|
464
|
+
key = (entry.get("video", ""), int(entry.get("clip_idx", -1)))
|
|
465
|
+
if key in selected_keys:
|
|
466
|
+
continue
|
|
467
|
+
selected.append(entry)
|
|
468
|
+
selected_keys.add(key)
|
|
469
|
+
if len(selected) >= limit:
|
|
470
|
+
break
|
|
471
|
+
|
|
472
|
+
return selected[:limit]
|
|
473
|
+
|
|
474
|
+
ranked = {}
|
|
475
|
+
for class_name in classes:
|
|
476
|
+
class_entries = []
|
|
477
|
+
for entry in all_entries:
|
|
478
|
+
class_score = float(entry.get("scores", {}).get(class_name, 0.0))
|
|
479
|
+
class_entries.append({
|
|
480
|
+
**entry,
|
|
481
|
+
"class_score": class_score,
|
|
482
|
+
"confidence_score": class_score,
|
|
483
|
+
"predicted_class": class_name,
|
|
484
|
+
})
|
|
485
|
+
ranked[class_name] = select_diverse_entries(class_entries, n_per_class)
|
|
486
|
+
|
|
487
|
+
return ranked
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def rank_confident_clips_per_video_for_review(
|
|
491
|
+
results: dict,
|
|
492
|
+
classes: list,
|
|
493
|
+
n_per_class: int = 100,
|
|
494
|
+
is_ovr: bool = False,
|
|
495
|
+
clip_length: int = 8,
|
|
496
|
+
min_gap_multiplier: float = 0.75,
|
|
497
|
+
) -> dict:
|
|
498
|
+
"""Rank top confident clips per class within each video."""
|
|
499
|
+
ranked = {}
|
|
500
|
+
for class_name in classes:
|
|
501
|
+
ranked[class_name] = {}
|
|
502
|
+
|
|
503
|
+
for video_path, res in results.items():
|
|
504
|
+
video_ranked = rank_confident_clips_for_review(
|
|
505
|
+
{video_path: res},
|
|
506
|
+
classes,
|
|
507
|
+
n_per_class=n_per_class,
|
|
508
|
+
is_ovr=is_ovr,
|
|
509
|
+
clip_length=clip_length,
|
|
510
|
+
min_gap_multiplier=min_gap_multiplier,
|
|
511
|
+
)
|
|
512
|
+
for class_name in classes:
|
|
513
|
+
ranked[class_name][video_path] = list(video_ranked.get(class_name, []) or [])
|
|
514
|
+
|
|
515
|
+
return ranked
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def save_uncertainty_report(
|
|
519
|
+
results: dict,
|
|
520
|
+
classes: list,
|
|
521
|
+
output_path: str,
|
|
522
|
+
is_ovr: bool = False,
|
|
523
|
+
n_per_class: int = 25,
|
|
524
|
+
clip_length: int = 8,
|
|
525
|
+
target_fps: int = 16,
|
|
526
|
+
) -> dict:
|
|
527
|
+
"""Compute uncertainty ranking and write it to a JSON file."""
|
|
528
|
+
ranked = rank_clips_for_review(
|
|
529
|
+
results, classes, n_per_class=n_per_class, is_ovr=is_ovr
|
|
530
|
+
)
|
|
531
|
+
ranked_per_video = rank_clips_per_video_for_review(
|
|
532
|
+
results, classes, n_per_class=n_per_class, is_ovr=is_ovr
|
|
533
|
+
)
|
|
534
|
+
transition_per_class = rank_transition_clips_for_review(
|
|
535
|
+
results,
|
|
536
|
+
classes,
|
|
537
|
+
clip_length=clip_length,
|
|
538
|
+
is_ovr=is_ovr,
|
|
539
|
+
n_per_class=max(50, n_per_class),
|
|
540
|
+
)
|
|
541
|
+
transition_per_class_per_video = rank_transition_clips_per_video_for_review(
|
|
542
|
+
results,
|
|
543
|
+
classes,
|
|
544
|
+
clip_length=clip_length,
|
|
545
|
+
is_ovr=is_ovr,
|
|
546
|
+
n_per_class=max(50, n_per_class),
|
|
547
|
+
)
|
|
548
|
+
confident_per_class = rank_confident_clips_for_review(
|
|
549
|
+
results,
|
|
550
|
+
classes,
|
|
551
|
+
n_per_class=max(100, n_per_class),
|
|
552
|
+
is_ovr=is_ovr,
|
|
553
|
+
clip_length=clip_length,
|
|
554
|
+
)
|
|
555
|
+
confident_per_class_per_video = rank_confident_clips_per_video_for_review(
|
|
556
|
+
results,
|
|
557
|
+
classes,
|
|
558
|
+
n_per_class=max(100, n_per_class),
|
|
559
|
+
is_ovr=is_ovr,
|
|
560
|
+
clip_length=clip_length,
|
|
561
|
+
)
|
|
562
|
+
report = {
|
|
563
|
+
"classes": classes,
|
|
564
|
+
"is_ovr": is_ovr,
|
|
565
|
+
"n_per_class": n_per_class,
|
|
566
|
+
"clip_length": clip_length,
|
|
567
|
+
"target_fps": target_fps,
|
|
568
|
+
"per_class": ranked,
|
|
569
|
+
"per_class_per_video": ranked_per_video,
|
|
570
|
+
"transition_per_class": transition_per_class,
|
|
571
|
+
"transition_per_class_per_video": transition_per_class_per_video,
|
|
572
|
+
"confident_per_class": confident_per_class,
|
|
573
|
+
"confident_per_class_per_video": confident_per_class_per_video,
|
|
574
|
+
}
|
|
575
|
+
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
|
|
576
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
577
|
+
json.dump(report, f, indent=2)
|
|
578
|
+
return report
|