singlebehaviorlab 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sam2/__init__.py +11 -0
- sam2/automatic_mask_generator.py +454 -0
- sam2/benchmark.py +92 -0
- sam2/build_sam.py +174 -0
- sam2/configs/sam2/sam2_hiera_b+.yaml +113 -0
- sam2/configs/sam2/sam2_hiera_l.yaml +117 -0
- sam2/configs/sam2/sam2_hiera_s.yaml +116 -0
- sam2/configs/sam2/sam2_hiera_t.yaml +118 -0
- sam2/configs/sam2.1/sam2.1_hiera_b+.yaml +116 -0
- sam2/configs/sam2.1/sam2.1_hiera_l.yaml +120 -0
- sam2/configs/sam2.1/sam2.1_hiera_s.yaml +119 -0
- sam2/configs/sam2.1/sam2.1_hiera_t.yaml +121 -0
- sam2/configs/sam2.1_training/sam2.1_hiera_b+_MOSE_finetune.yaml +339 -0
- sam2/modeling/__init__.py +5 -0
- sam2/modeling/backbones/__init__.py +5 -0
- sam2/modeling/backbones/hieradet.py +317 -0
- sam2/modeling/backbones/image_encoder.py +134 -0
- sam2/modeling/backbones/utils.py +93 -0
- sam2/modeling/memory_attention.py +169 -0
- sam2/modeling/memory_encoder.py +181 -0
- sam2/modeling/position_encoding.py +239 -0
- sam2/modeling/sam/__init__.py +5 -0
- sam2/modeling/sam/mask_decoder.py +295 -0
- sam2/modeling/sam/prompt_encoder.py +202 -0
- sam2/modeling/sam/transformer.py +311 -0
- sam2/modeling/sam2_base.py +913 -0
- sam2/modeling/sam2_utils.py +323 -0
- sam2/sam2_hiera_b+.yaml +113 -0
- sam2/sam2_hiera_l.yaml +117 -0
- sam2/sam2_hiera_s.yaml +116 -0
- sam2/sam2_hiera_t.yaml +118 -0
- sam2/sam2_image_predictor.py +466 -0
- sam2/sam2_video_predictor.py +1388 -0
- sam2/sam2_video_predictor_legacy.py +1172 -0
- sam2/utils/__init__.py +5 -0
- sam2/utils/amg.py +348 -0
- sam2/utils/misc.py +349 -0
- sam2/utils/transforms.py +118 -0
- singlebehaviorlab/__init__.py +4 -0
- singlebehaviorlab/__main__.py +130 -0
- singlebehaviorlab/_paths.py +100 -0
- singlebehaviorlab/backend/__init__.py +2 -0
- singlebehaviorlab/backend/augmentations.py +320 -0
- singlebehaviorlab/backend/data_store.py +420 -0
- singlebehaviorlab/backend/model.py +1290 -0
- singlebehaviorlab/backend/train.py +4667 -0
- singlebehaviorlab/backend/uncertainty.py +578 -0
- singlebehaviorlab/backend/video_processor.py +688 -0
- singlebehaviorlab/backend/video_utils.py +139 -0
- singlebehaviorlab/data/config/config.yaml +85 -0
- singlebehaviorlab/data/training_profiles.json +334 -0
- singlebehaviorlab/gui/__init__.py +4 -0
- singlebehaviorlab/gui/analysis_widget.py +2291 -0
- singlebehaviorlab/gui/attention_export.py +311 -0
- singlebehaviorlab/gui/clip_extraction_widget.py +481 -0
- singlebehaviorlab/gui/clustering_widget.py +3187 -0
- singlebehaviorlab/gui/inference_popups.py +1138 -0
- singlebehaviorlab/gui/inference_widget.py +4550 -0
- singlebehaviorlab/gui/inference_worker.py +651 -0
- singlebehaviorlab/gui/labeling_widget.py +2324 -0
- singlebehaviorlab/gui/main_window.py +754 -0
- singlebehaviorlab/gui/metadata_management_widget.py +1119 -0
- singlebehaviorlab/gui/motion_tracking.py +764 -0
- singlebehaviorlab/gui/overlay_export.py +1234 -0
- singlebehaviorlab/gui/plot_integration.py +729 -0
- singlebehaviorlab/gui/qt_helpers.py +29 -0
- singlebehaviorlab/gui/registration_widget.py +1485 -0
- singlebehaviorlab/gui/review_widget.py +1330 -0
- singlebehaviorlab/gui/segmentation_tracking_widget.py +2752 -0
- singlebehaviorlab/gui/tab_tutorial_dialog.py +312 -0
- singlebehaviorlab/gui/timeline_themes.py +131 -0
- singlebehaviorlab/gui/training_profiles.py +418 -0
- singlebehaviorlab/gui/training_widget.py +3719 -0
- singlebehaviorlab/gui/video_utils.py +233 -0
- singlebehaviorlab/licenses/SAM2-LICENSE +201 -0
- singlebehaviorlab/licenses/VideoPrism-LICENSE +202 -0
- singlebehaviorlab-2.0.0.dist-info/METADATA +447 -0
- singlebehaviorlab-2.0.0.dist-info/RECORD +88 -0
- singlebehaviorlab-2.0.0.dist-info/WHEEL +5 -0
- singlebehaviorlab-2.0.0.dist-info/entry_points.txt +2 -0
- singlebehaviorlab-2.0.0.dist-info/licenses/LICENSE +21 -0
- singlebehaviorlab-2.0.0.dist-info/top_level.txt +3 -0
- videoprism/__init__.py +0 -0
- videoprism/encoders.py +910 -0
- videoprism/layers.py +1136 -0
- videoprism/models.py +407 -0
- videoprism/tokenizers.py +167 -0
- videoprism/utils.py +168 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import cv2
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def extract_clips(
|
|
10
|
+
video_path: str,
|
|
11
|
+
output_dir: str,
|
|
12
|
+
target_fps: int = 16,
|
|
13
|
+
clip_length_frames: int = 16,
|
|
14
|
+
step_frames: int = 16,
|
|
15
|
+
progress_callback: Optional[callable] = None,
|
|
16
|
+
stop_callback: Optional[callable] = None,
|
|
17
|
+
) -> tuple[int, str]:
|
|
18
|
+
"""Subsample video to target_fps and cut into non-overlapping clips (use step_frames < clip_length_frames for overlap)."""
|
|
19
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
cap = cv2.VideoCapture(video_path)
|
|
22
|
+
if not cap.isOpened():
|
|
23
|
+
raise ValueError(f"Could not open video: {video_path}")
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
orig_fps = cap.get(cv2.CAP_PROP_FPS)
|
|
27
|
+
if orig_fps <= 0:
|
|
28
|
+
orig_fps = 30.0
|
|
29
|
+
|
|
30
|
+
frame_interval = max(1, int(round(orig_fps / target_fps)))
|
|
31
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
32
|
+
|
|
33
|
+
total_frames_after_subsampling = total_frames // frame_interval
|
|
34
|
+
if step_frames >= clip_length_frames:
|
|
35
|
+
total_clips = total_frames_after_subsampling // clip_length_frames
|
|
36
|
+
else:
|
|
37
|
+
total_clips = max(0, (total_frames_after_subsampling - clip_length_frames) // step_frames + 1) if total_frames_after_subsampling >= clip_length_frames else 0
|
|
38
|
+
|
|
39
|
+
frame_idx = 0
|
|
40
|
+
clip_idx = 0
|
|
41
|
+
frames_buffer = []
|
|
42
|
+
skip_remaining = 0
|
|
43
|
+
|
|
44
|
+
while True:
|
|
45
|
+
if stop_callback and stop_callback():
|
|
46
|
+
break
|
|
47
|
+
ret, frame = cap.read()
|
|
48
|
+
if not ret:
|
|
49
|
+
break
|
|
50
|
+
|
|
51
|
+
if frame_idx % frame_interval == 0:
|
|
52
|
+
if skip_remaining > 0:
|
|
53
|
+
skip_remaining -= 1
|
|
54
|
+
else:
|
|
55
|
+
frames_buffer.append(frame)
|
|
56
|
+
|
|
57
|
+
if len(frames_buffer) == clip_length_frames:
|
|
58
|
+
clip_path = os.path.join(output_dir, f"clip_{clip_idx:06d}.mp4")
|
|
59
|
+
save_clip(frames_buffer, clip_path, target_fps)
|
|
60
|
+
clip_idx += 1
|
|
61
|
+
|
|
62
|
+
if progress_callback:
|
|
63
|
+
progress_callback(clip_idx, total_clips)
|
|
64
|
+
|
|
65
|
+
if step_frames < clip_length_frames:
|
|
66
|
+
frames_buffer = frames_buffer[clip_length_frames - step_frames:]
|
|
67
|
+
else:
|
|
68
|
+
frames_buffer = []
|
|
69
|
+
skip_remaining = max(0, step_frames - clip_length_frames)
|
|
70
|
+
|
|
71
|
+
frame_idx += 1
|
|
72
|
+
|
|
73
|
+
return clip_idx, output_dir
|
|
74
|
+
finally:
|
|
75
|
+
cap.release()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def save_clip(frames: list, output_path: str, fps: float):
|
|
79
|
+
"""Save a list of frames as a standard MP4 clip (mp4v codec)."""
|
|
80
|
+
if not frames:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
h, w, c = frames[0].shape
|
|
84
|
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
85
|
+
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
|
|
86
|
+
|
|
87
|
+
for frame in frames:
|
|
88
|
+
out.write(frame)
|
|
89
|
+
|
|
90
|
+
out.release()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def load_clip_frames(clip_path: str, target_size: Optional[tuple[int, int]] = None) -> list:
|
|
94
|
+
"""Load frames from a video clip.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
clip_path: Path to video clip
|
|
98
|
+
target_size: Optional (width, height) to resize frames
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of frames as numpy arrays (BGR format)
|
|
102
|
+
"""
|
|
103
|
+
cap = cv2.VideoCapture(clip_path)
|
|
104
|
+
frames = []
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
while True:
|
|
108
|
+
ret, frame = cap.read()
|
|
109
|
+
if not ret:
|
|
110
|
+
break
|
|
111
|
+
|
|
112
|
+
if target_size:
|
|
113
|
+
frame = cv2.resize(frame, target_size, interpolation=cv2.INTER_AREA)
|
|
114
|
+
|
|
115
|
+
frames.append(frame)
|
|
116
|
+
|
|
117
|
+
return frames
|
|
118
|
+
finally:
|
|
119
|
+
cap.release()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_video_info(video_path: str) -> dict:
|
|
123
|
+
"""Get video metadata."""
|
|
124
|
+
cap = cv2.VideoCapture(video_path)
|
|
125
|
+
if not cap.isOpened():
|
|
126
|
+
return {}
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
info = {
|
|
130
|
+
'fps': cap.get(cv2.CAP_PROP_FPS),
|
|
131
|
+
'frame_count': int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
|
|
132
|
+
'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
|
133
|
+
'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return info
|
|
137
|
+
finally:
|
|
138
|
+
cap.release()
|
|
139
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# SingleBehaviorLab Configuration
|
|
2
|
+
# Paths below are resolved relative to the app directory on first launch.
|
|
3
|
+
# They will be updated automatically when you create or load an experiment.
|
|
4
|
+
|
|
5
|
+
# --- Paths (leave blank; auto-resolved at startup) ---
|
|
6
|
+
data_dir:
|
|
7
|
+
raw_videos_dir:
|
|
8
|
+
clips_dir:
|
|
9
|
+
annotations_dir:
|
|
10
|
+
models_dir:
|
|
11
|
+
backbone_dir:
|
|
12
|
+
annotation_file:
|
|
13
|
+
experiments_dir:
|
|
14
|
+
experiment_name:
|
|
15
|
+
experiment_path:
|
|
16
|
+
config_path:
|
|
17
|
+
|
|
18
|
+
# --- Video extraction defaults ---
|
|
19
|
+
default_target_fps: 12
|
|
20
|
+
default_clip_length: 8
|
|
21
|
+
default_step_frames: 8
|
|
22
|
+
|
|
23
|
+
# --- Training defaults ---
|
|
24
|
+
default_batch_size: 16
|
|
25
|
+
default_epochs: 60
|
|
26
|
+
default_learning_rate: 0.0005
|
|
27
|
+
default_weight_decay: 0.001
|
|
28
|
+
|
|
29
|
+
# --- Model ---
|
|
30
|
+
videoprism_model_name: videoprism_public_v1_base
|
|
31
|
+
backbone_model: videoprism_public_v1_base
|
|
32
|
+
resolution: 288
|
|
33
|
+
model_input_size:
|
|
34
|
+
- 288
|
|
35
|
+
- 288
|
|
36
|
+
|
|
37
|
+
# --- Training options ---
|
|
38
|
+
default_use_scheduler: true
|
|
39
|
+
default_use_ema: true
|
|
40
|
+
default_map_num_queries: 1
|
|
41
|
+
default_use_class_map: false
|
|
42
|
+
default_use_cosine: false
|
|
43
|
+
default_use_label_smoothing: false
|
|
44
|
+
default_use_supcon_loss: true
|
|
45
|
+
default_supcon_temp: 0.07
|
|
46
|
+
default_supcon_weight: 0.5
|
|
47
|
+
default_use_focal_loss: true
|
|
48
|
+
default_focal_gamma: 2.0
|
|
49
|
+
|
|
50
|
+
# --- Last training session (auto-updated by app) ---
|
|
51
|
+
last_training:
|
|
52
|
+
parameters:
|
|
53
|
+
batch_size: 16
|
|
54
|
+
epochs: 60
|
|
55
|
+
lr: 0.0005
|
|
56
|
+
use_scheduler: true
|
|
57
|
+
weight_decay: 0.001
|
|
58
|
+
clip_length: 8
|
|
59
|
+
val_split: 0.2
|
|
60
|
+
use_class_weights: false
|
|
61
|
+
use_label_smoothing: false
|
|
62
|
+
use_supcon_loss: true
|
|
63
|
+
supcon_temp: 0.07
|
|
64
|
+
supcon_weight: 0.5
|
|
65
|
+
use_triplet_loss: false
|
|
66
|
+
triplet_margin: 0.2
|
|
67
|
+
triplet_weight: 0.5
|
|
68
|
+
use_focal_loss: true
|
|
69
|
+
focal_gamma: 2.0
|
|
70
|
+
use_weighted_sampler: true
|
|
71
|
+
use_augmentation: true
|
|
72
|
+
limit_classes: true
|
|
73
|
+
limit_per_class: false
|
|
74
|
+
selected_classes: []
|
|
75
|
+
per_class_limits: {}
|
|
76
|
+
head:
|
|
77
|
+
dropout: 0.25
|
|
78
|
+
map_head_kwargs:
|
|
79
|
+
num_heads: 4
|
|
80
|
+
mlp_ratio: 4.0
|
|
81
|
+
num_queries: 1
|
|
82
|
+
use_class_map: false
|
|
83
|
+
use_cosine: false
|
|
84
|
+
pretrained_path:
|
|
85
|
+
output_path:
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
{
|
|
2
|
+
"LowInputData": {
|
|
3
|
+
"batch_size": 8,
|
|
4
|
+
"epochs": 60,
|
|
5
|
+
"lr": 5e-05,
|
|
6
|
+
"classification_lr": 5e-05,
|
|
7
|
+
"use_scheduler": true,
|
|
8
|
+
"use_ema": true,
|
|
9
|
+
"weight_decay": 0.01,
|
|
10
|
+
"head_kwargs": {
|
|
11
|
+
"num_heads": 1
|
|
12
|
+
},
|
|
13
|
+
"dropout": 0.5,
|
|
14
|
+
"clip_length": 8,
|
|
15
|
+
"use_all_for_training": true,
|
|
16
|
+
"val_split": 0.15,
|
|
17
|
+
"auto_tune_before_final": false,
|
|
18
|
+
"auto_tune_runs": 8,
|
|
19
|
+
"auto_tune_epochs": 12,
|
|
20
|
+
"use_class_weights": false,
|
|
21
|
+
"use_focal_loss": false,
|
|
22
|
+
"focal_gamma": 2.0,
|
|
23
|
+
"use_supcon_loss": false,
|
|
24
|
+
"supcon_weight": 0.8,
|
|
25
|
+
"supcon_temperature": 0.1,
|
|
26
|
+
"use_frame_loss": true,
|
|
27
|
+
"use_temporal_decoder": true,
|
|
28
|
+
"frame_head_temporal_layers": 1,
|
|
29
|
+
"temporal_pool_frames": 1,
|
|
30
|
+
"proj_dim": 64,
|
|
31
|
+
"use_frame_bout_balance": true,
|
|
32
|
+
"frame_bout_balance_power": 0.5,
|
|
33
|
+
"boundary_loss_weight": 0.3,
|
|
34
|
+
"boundary_tolerance": 2,
|
|
35
|
+
"smoothness_loss_weight": 0.05,
|
|
36
|
+
"use_ovr": true,
|
|
37
|
+
"ovr_background_as_negative": false,
|
|
38
|
+
"ovr_background_class_names": [],
|
|
39
|
+
"ovr_label_smoothing": 0.05,
|
|
40
|
+
"use_asl": false,
|
|
41
|
+
"asl_gamma_neg": 3.0,
|
|
42
|
+
"asl_gamma_pos": 0.0,
|
|
43
|
+
"asl_clip": 0.05,
|
|
44
|
+
"use_hard_pair_mining": false,
|
|
45
|
+
"hard_pairs": [],
|
|
46
|
+
"hard_pair_loss_weight": 0.2,
|
|
47
|
+
"hard_pair_margin": 0.5,
|
|
48
|
+
"hard_pair_confusion_boost": 1.5,
|
|
49
|
+
"use_confusion_sampler": true,
|
|
50
|
+
"confusion_sampler_temperature": 2.0,
|
|
51
|
+
"confusion_sampler_warmup_pct": 0.2,
|
|
52
|
+
"use_weighted_sampler": true,
|
|
53
|
+
"use_augmentation": true,
|
|
54
|
+
"virtual_expansion": 15,
|
|
55
|
+
"stitch_augmentation_prob": 0.0,
|
|
56
|
+
"emb_aug_versions": 5,
|
|
57
|
+
"multi_scale": false,
|
|
58
|
+
"augmentation_options": {
|
|
59
|
+
"use_horizontal_flip": true,
|
|
60
|
+
"use_vertical_flip": true,
|
|
61
|
+
"use_color_jitter": true,
|
|
62
|
+
"use_gaussian_blur": true,
|
|
63
|
+
"use_random_noise": true,
|
|
64
|
+
"use_small_rotation": true,
|
|
65
|
+
"use_speed_perturb": false,
|
|
66
|
+
"use_random_shapes": true,
|
|
67
|
+
"use_grayscale": false,
|
|
68
|
+
"use_lighting_robustness": true
|
|
69
|
+
},
|
|
70
|
+
"limit_classes": false,
|
|
71
|
+
"selected_classes": [],
|
|
72
|
+
"limit_per_class": false,
|
|
73
|
+
"per_class_limits": {},
|
|
74
|
+
"per_class_val_limits": {},
|
|
75
|
+
"use_embedding_diversity": false,
|
|
76
|
+
"backbone_model": "videoprism_public_v1_base",
|
|
77
|
+
"class_names": [
|
|
78
|
+
"walk",
|
|
79
|
+
"nowalk"
|
|
80
|
+
],
|
|
81
|
+
"pretrained_path": null,
|
|
82
|
+
"f1_exclude_classes": [],
|
|
83
|
+
"ovr_pos_weight_f1_excluded": 1.5
|
|
84
|
+
},
|
|
85
|
+
"MoreInputData": {
|
|
86
|
+
"batch_size": 24,
|
|
87
|
+
"epochs": 60,
|
|
88
|
+
"lr": 5e-05,
|
|
89
|
+
"classification_lr": 5e-05,
|
|
90
|
+
"use_scheduler": true,
|
|
91
|
+
"use_ema": true,
|
|
92
|
+
"weight_decay": 0.01,
|
|
93
|
+
"head_kwargs": {
|
|
94
|
+
"num_heads": 8
|
|
95
|
+
},
|
|
96
|
+
"dropout": 0.6,
|
|
97
|
+
"clip_length": 8,
|
|
98
|
+
"use_all_for_training": false,
|
|
99
|
+
"val_split": 0.15,
|
|
100
|
+
"auto_tune_before_final": false,
|
|
101
|
+
"auto_tune_runs": 8,
|
|
102
|
+
"auto_tune_epochs": 12,
|
|
103
|
+
"use_class_weights": false,
|
|
104
|
+
"use_focal_loss": false,
|
|
105
|
+
"focal_gamma": 2.0,
|
|
106
|
+
"use_supcon_loss": true,
|
|
107
|
+
"supcon_weight": 0.8,
|
|
108
|
+
"supcon_temperature": 0.1,
|
|
109
|
+
"use_frame_loss": true,
|
|
110
|
+
"use_temporal_decoder": true,
|
|
111
|
+
"frame_head_temporal_layers": 6,
|
|
112
|
+
"temporal_pool_frames": 1,
|
|
113
|
+
"proj_dim": 320,
|
|
114
|
+
"use_frame_bout_balance": true,
|
|
115
|
+
"frame_bout_balance_power": 0.5,
|
|
116
|
+
"boundary_loss_weight": 0.3,
|
|
117
|
+
"boundary_tolerance": 2,
|
|
118
|
+
"smoothness_loss_weight": 0.05,
|
|
119
|
+
"use_ovr": true,
|
|
120
|
+
"ovr_background_as_negative": false,
|
|
121
|
+
"ovr_background_class_names": [],
|
|
122
|
+
"ovr_label_smoothing": 0.05,
|
|
123
|
+
"use_asl": false,
|
|
124
|
+
"asl_gamma_neg": 3.0,
|
|
125
|
+
"asl_gamma_pos": 0.0,
|
|
126
|
+
"asl_clip": 0.05,
|
|
127
|
+
"use_hard_pair_mining": false,
|
|
128
|
+
"hard_pairs": [],
|
|
129
|
+
"hard_pair_loss_weight": 0.2,
|
|
130
|
+
"hard_pair_margin": 0.5,
|
|
131
|
+
"hard_pair_confusion_boost": 1.5,
|
|
132
|
+
"use_confusion_sampler": true,
|
|
133
|
+
"confusion_sampler_temperature": 2.0,
|
|
134
|
+
"confusion_sampler_warmup_pct": 0.2,
|
|
135
|
+
"use_weighted_sampler": true,
|
|
136
|
+
"use_augmentation": true,
|
|
137
|
+
"virtual_expansion": 10,
|
|
138
|
+
"stitch_augmentation_prob": 0.0,
|
|
139
|
+
"emb_aug_versions": 5,
|
|
140
|
+
"multi_scale": false,
|
|
141
|
+
"augmentation_options": {
|
|
142
|
+
"use_horizontal_flip": true,
|
|
143
|
+
"use_vertical_flip": true,
|
|
144
|
+
"use_color_jitter": true,
|
|
145
|
+
"use_gaussian_blur": true,
|
|
146
|
+
"use_random_noise": true,
|
|
147
|
+
"use_small_rotation": true,
|
|
148
|
+
"use_speed_perturb": false,
|
|
149
|
+
"use_random_shapes": true,
|
|
150
|
+
"use_grayscale": false,
|
|
151
|
+
"use_lighting_robustness": true
|
|
152
|
+
},
|
|
153
|
+
"limit_classes": false,
|
|
154
|
+
"selected_classes": [],
|
|
155
|
+
"limit_per_class": false,
|
|
156
|
+
"per_class_limits": {},
|
|
157
|
+
"per_class_val_limits": {},
|
|
158
|
+
"use_embedding_diversity": false,
|
|
159
|
+
"backbone_model": "videoprism_public_v1_base",
|
|
160
|
+
"class_names": [
|
|
161
|
+
"walk",
|
|
162
|
+
"nowalk"
|
|
163
|
+
],
|
|
164
|
+
"pretrained_path": null,
|
|
165
|
+
"f1_exclude_classes": [],
|
|
166
|
+
"ovr_pos_weight_f1_excluded": 1.5
|
|
167
|
+
},
|
|
168
|
+
"LocalizationLowData": {
|
|
169
|
+
"batch_size": 8,
|
|
170
|
+
"epochs": 60,
|
|
171
|
+
"lr": 5e-05,
|
|
172
|
+
"classification_lr": 5e-05,
|
|
173
|
+
"use_scheduler": true,
|
|
174
|
+
"use_ema": true,
|
|
175
|
+
"weight_decay": 0.01,
|
|
176
|
+
"head_kwargs": {
|
|
177
|
+
"num_heads": 1
|
|
178
|
+
},
|
|
179
|
+
"dropout": 0.5,
|
|
180
|
+
"clip_length": 8,
|
|
181
|
+
"use_all_for_training": true,
|
|
182
|
+
"val_split": 0.15,
|
|
183
|
+
"auto_tune_before_final": false,
|
|
184
|
+
"auto_tune_runs": 8,
|
|
185
|
+
"auto_tune_epochs": 12,
|
|
186
|
+
"use_class_weights": false,
|
|
187
|
+
"use_focal_loss": false,
|
|
188
|
+
"focal_gamma": 2.0,
|
|
189
|
+
"use_supcon_loss": false,
|
|
190
|
+
"supcon_weight": 0.8,
|
|
191
|
+
"supcon_temperature": 0.1,
|
|
192
|
+
"use_frame_loss": true,
|
|
193
|
+
"use_temporal_decoder": true,
|
|
194
|
+
"frame_head_temporal_layers": 1,
|
|
195
|
+
"temporal_pool_frames": 1,
|
|
196
|
+
"proj_dim": 64,
|
|
197
|
+
"use_frame_bout_balance": true,
|
|
198
|
+
"frame_bout_balance_power": 0.5,
|
|
199
|
+
"boundary_loss_weight": 0.3,
|
|
200
|
+
"boundary_tolerance": 2,
|
|
201
|
+
"smoothness_loss_weight": 0.05,
|
|
202
|
+
"use_ovr": true,
|
|
203
|
+
"ovr_background_as_negative": false,
|
|
204
|
+
"ovr_background_class_names": [],
|
|
205
|
+
"ovr_label_smoothing": 0.05,
|
|
206
|
+
"use_asl": false,
|
|
207
|
+
"asl_gamma_neg": 3.0,
|
|
208
|
+
"asl_gamma_pos": 0.0,
|
|
209
|
+
"asl_clip": 0.05,
|
|
210
|
+
"use_hard_pair_mining": false,
|
|
211
|
+
"hard_pairs": [],
|
|
212
|
+
"hard_pair_loss_weight": 0.2,
|
|
213
|
+
"hard_pair_margin": 0.5,
|
|
214
|
+
"hard_pair_confusion_boost": 1.5,
|
|
215
|
+
"use_confusion_sampler": true,
|
|
216
|
+
"confusion_sampler_temperature": 2.0,
|
|
217
|
+
"confusion_sampler_warmup_pct": 0.2,
|
|
218
|
+
"use_weighted_sampler": true,
|
|
219
|
+
"use_augmentation": true,
|
|
220
|
+
"virtual_expansion": 15,
|
|
221
|
+
"stitch_augmentation_prob": 0.0,
|
|
222
|
+
"emb_aug_versions": 5,
|
|
223
|
+
"multi_scale": false,
|
|
224
|
+
"augmentation_options": {
|
|
225
|
+
"use_horizontal_flip": true,
|
|
226
|
+
"use_vertical_flip": true,
|
|
227
|
+
"use_color_jitter": true,
|
|
228
|
+
"use_gaussian_blur": true,
|
|
229
|
+
"use_random_noise": true,
|
|
230
|
+
"use_small_rotation": true,
|
|
231
|
+
"use_speed_perturb": false,
|
|
232
|
+
"use_random_shapes": true,
|
|
233
|
+
"use_grayscale": false,
|
|
234
|
+
"use_lighting_robustness": true
|
|
235
|
+
},
|
|
236
|
+
"limit_classes": false,
|
|
237
|
+
"selected_classes": [],
|
|
238
|
+
"limit_per_class": false,
|
|
239
|
+
"per_class_limits": {},
|
|
240
|
+
"per_class_val_limits": {},
|
|
241
|
+
"use_embedding_diversity": false,
|
|
242
|
+
"backbone_model": "videoprism_public_v1_base",
|
|
243
|
+
"class_names": [
|
|
244
|
+
"walk",
|
|
245
|
+
"nowalk"
|
|
246
|
+
],
|
|
247
|
+
"pretrained_path": null,
|
|
248
|
+
"f1_exclude_classes": [],
|
|
249
|
+
"ovr_pos_weight_f1_excluded": 1.5
|
|
250
|
+
},
|
|
251
|
+
"LocalizationMoreData": {
|
|
252
|
+
"batch_size": 24,
|
|
253
|
+
"epochs": 60,
|
|
254
|
+
"lr": 5e-05,
|
|
255
|
+
"classification_lr": 5e-05,
|
|
256
|
+
"use_scheduler": true,
|
|
257
|
+
"use_ema": true,
|
|
258
|
+
"weight_decay": 0.01,
|
|
259
|
+
"head_kwargs": {
|
|
260
|
+
"num_heads": 8
|
|
261
|
+
},
|
|
262
|
+
"dropout": 0.6,
|
|
263
|
+
"clip_length": 8,
|
|
264
|
+
"use_all_for_training": false,
|
|
265
|
+
"val_split": 0.15,
|
|
266
|
+
"auto_tune_before_final": false,
|
|
267
|
+
"auto_tune_runs": 8,
|
|
268
|
+
"auto_tune_epochs": 12,
|
|
269
|
+
"use_class_weights": false,
|
|
270
|
+
"use_focal_loss": false,
|
|
271
|
+
"focal_gamma": 2.0,
|
|
272
|
+
"use_supcon_loss": true,
|
|
273
|
+
"supcon_weight": 0.8,
|
|
274
|
+
"supcon_temperature": 0.1,
|
|
275
|
+
"use_frame_loss": true,
|
|
276
|
+
"use_temporal_decoder": true,
|
|
277
|
+
"frame_head_temporal_layers": 6,
|
|
278
|
+
"temporal_pool_frames": 1,
|
|
279
|
+
"proj_dim": 320,
|
|
280
|
+
"use_frame_bout_balance": true,
|
|
281
|
+
"frame_bout_balance_power": 0.5,
|
|
282
|
+
"boundary_loss_weight": 0.3,
|
|
283
|
+
"boundary_tolerance": 2,
|
|
284
|
+
"smoothness_loss_weight": 0.05,
|
|
285
|
+
"use_ovr": true,
|
|
286
|
+
"ovr_background_as_negative": false,
|
|
287
|
+
"ovr_background_class_names": [],
|
|
288
|
+
"ovr_label_smoothing": 0.05,
|
|
289
|
+
"use_asl": false,
|
|
290
|
+
"asl_gamma_neg": 3.0,
|
|
291
|
+
"asl_gamma_pos": 0.0,
|
|
292
|
+
"asl_clip": 0.05,
|
|
293
|
+
"use_hard_pair_mining": false,
|
|
294
|
+
"hard_pairs": [],
|
|
295
|
+
"hard_pair_loss_weight": 0.2,
|
|
296
|
+
"hard_pair_margin": 0.5,
|
|
297
|
+
"hard_pair_confusion_boost": 1.5,
|
|
298
|
+
"use_confusion_sampler": true,
|
|
299
|
+
"confusion_sampler_temperature": 2.0,
|
|
300
|
+
"confusion_sampler_warmup_pct": 0.2,
|
|
301
|
+
"use_weighted_sampler": true,
|
|
302
|
+
"use_augmentation": true,
|
|
303
|
+
"virtual_expansion": 10,
|
|
304
|
+
"stitch_augmentation_prob": 0.0,
|
|
305
|
+
"emb_aug_versions": 5,
|
|
306
|
+
"multi_scale": false,
|
|
307
|
+
"augmentation_options": {
|
|
308
|
+
"use_horizontal_flip": true,
|
|
309
|
+
"use_vertical_flip": true,
|
|
310
|
+
"use_color_jitter": true,
|
|
311
|
+
"use_gaussian_blur": true,
|
|
312
|
+
"use_random_noise": true,
|
|
313
|
+
"use_small_rotation": true,
|
|
314
|
+
"use_speed_perturb": false,
|
|
315
|
+
"use_random_shapes": true,
|
|
316
|
+
"use_grayscale": false,
|
|
317
|
+
"use_lighting_robustness": true
|
|
318
|
+
},
|
|
319
|
+
"limit_classes": false,
|
|
320
|
+
"selected_classes": [],
|
|
321
|
+
"limit_per_class": false,
|
|
322
|
+
"per_class_limits": {},
|
|
323
|
+
"per_class_val_limits": {},
|
|
324
|
+
"use_embedding_diversity": false,
|
|
325
|
+
"backbone_model": "videoprism_public_v1_base",
|
|
326
|
+
"class_names": [
|
|
327
|
+
"walk",
|
|
328
|
+
"nowalk"
|
|
329
|
+
],
|
|
330
|
+
"pretrained_path": null,
|
|
331
|
+
"f1_exclude_classes": [],
|
|
332
|
+
"ovr_pos_weight_f1_excluded": 1.5
|
|
333
|
+
}
|
|
334
|
+
}
|