PyPI - singlebehaviorlab - Versions diffs - 2.3.1__tar.gz → 2.3.2__tar.gz - Mend

singlebehaviorlab 2.3.1tar.gz → 2.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

{singlebehaviorlab-2.3.1/singlebehaviorlab.egg-info → singlebehaviorlab-2.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: singlebehaviorlab
-Version: 2.3.1
+Version: 2.3.2
 Summary: Semi-automated behavioral video annotation, training, and analysis tool
 Author: Almir Aljovic
 Maintainer: Almir Aljovic

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "singlebehaviorlab"
-version = "2.3.1"
+version = "2.3.2"
 description = "Semi-automated behavioral video annotation, training, and analysis tool"
 readme = "README.md"
 license = { file = "LICENSE" }

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab/__init__.py RENAMED Viewed

@@ -19,7 +19,7 @@ or videoprism. Each symbol triggers its underlying backend module only on
 first access.
 """
-__version__ = "2.3.1"
+__version__ = "2.3.2"
 __author__ = "Almir Aljovic"
 # Mapping of public name → (backend module, attribute name).
@@ -35,6 +35,7 @@ _PUBLIC_API = {
     "infer": ("singlebehaviorlab.backend.inference", "run_inference_on_video"),
     "train": ("singlebehaviorlab.backend.training_runner", "run_training_session"),
     "load_config": ("singlebehaviorlab.config", "load_config"),
+    "learn_behavior_features": ("singlebehaviorlab.backend.contrastive", "learn_behavior_features"),
     "load_demo": ("singlebehaviorlab.demo", "load_demo"),
     "DEMOS": ("singlebehaviorlab.demo", "DEMOS"),
 }

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab/backend/clustering.py RENAMED Viewed

@@ -28,7 +28,8 @@ class ClusteringParams:
     n_components: int = 2
     n_neighbors: int = 15
     min_dist: float = 0.1
-    normalization: str = "standard"  # standard | minmax | l2 | none
+    normalization: str = "standard"
+    subtract_video_mean: bool = False
     leiden_resolution: float = 1.0
     leiden_k: int = 15
     min_cluster_size: int = 10
@@ -157,8 +158,29 @@ def run_clustering(
     matrix_df, metadata_df = _load_matrix_metadata(matrix_path_str, metadata_path_str)
     _log(f"Matrix shape: {matrix_df.shape[0]} features × {matrix_df.shape[1]} samples")
-    processed = _normalize(matrix_df.T, params.normalization)
-    _log(f"Processed shape: {processed.shape} (samples × features); normalization={params.normalization}")
+    X = matrix_df.T
+    X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
+    if params.subtract_video_mean and metadata_df is not None:
+        group_col = None
+        for col in ("group", "video_id"):
+            if col in metadata_df.columns:
+                group_col = col
+                break
+        snippet_col = "snippet" if "snippet" in metadata_df.columns else None
+        if group_col and snippet_col:
+            for grp in metadata_df[group_col].unique():
+                grp_snippets = metadata_df.loc[metadata_df[group_col] == grp, snippet_col].values
+                mask = X.index.isin(grp_snippets)
+                if mask.sum() > 1:
+                    X.loc[mask] -= X.loc[mask].mean(axis=0)
+            _log("Applied per-video mean subtraction")
+    processed = _normalize(X, params.normalization)
+    _log(f"Processed shape: {processed.shape} (samples × features)")
     _log(
         f"Running UMAP (n_neighbors={params.n_neighbors}, "

singlebehaviorlab-2.3.2/singlebehaviorlab/backend/contrastive.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""Temporal contrastive projection for behavior-focused embeddings.
+Trains a lightweight MLP on pre-computed VideoPrism embeddings using
+temporal proximity as the supervision signal: clips close in time within
+the same video should map nearby; clips far apart should map far away.
+The projected embeddings suppress static visual factors (lighting,
+background, camera) and amplify behavioral dynamics.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Any, Callable, Optional
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ["learn_behavior_features"]
+_DEFAULT_DIM = 128
+_DEFAULT_EPOCHS = 30
+_DEFAULT_LR = 3e-4
+_POSITIVE_WINDOW = 5
+_TEMPERATURE = 0.07
+class _ProjectionHead(nn.Module):
+    def __init__(self, in_dim: int, out_dim: int):
+        super().__init__()
+        hidden = max(out_dim, in_dim // 2)
+        self.net = nn.Sequential(
+            nn.Linear(in_dim, hidden),
+            nn.ReLU(),
+            nn.Linear(hidden, out_dim),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return F.normalize(self.net(x), dim=-1)
+def _build_pairs(
+    metadata: pd.DataFrame,
+    n_samples: int,
+    positive_window: int,
+    rng: np.random.Generator,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    group_col = None
+    for col in ("group", "video_id"):
+        if col in metadata.columns:
+            group_col = col
+            break
+    snippet_col = "snippet" if "snippet" in metadata.columns else None
+    if not group_col or not snippet_col:
+        indices = np.arange(len(metadata))
+        rng.shuffle(indices)
+        anchors = indices[:n_samples]
+        positives = np.clip(anchors + rng.integers(-positive_window, positive_window + 1, size=n_samples), 0, len(metadata) - 1)
+        negatives = rng.integers(0, len(metadata), size=n_samples)
+        return anchors, positives, negatives
+    groups = metadata[group_col].values
+    unique_groups = np.unique(groups)
+    group_indices: dict[Any, np.ndarray] = {}
+    for g in unique_groups:
+        group_indices[g] = np.where(groups == g)[0]
+    anchors = []
+    positives = []
+    negatives = []
+    per_group = max(1, n_samples // len(unique_groups))
+    for g in unique_groups:
+        idx = group_indices[g]
+        if len(idx) < 2:
+            continue
+        a = rng.choice(idx, size=min(per_group, len(idx)), replace=len(idx) < per_group)
+        for ai in a:
+            pos_in_group = np.where(idx == ai)[0][0]
+            lo = max(0, pos_in_group - positive_window)
+            hi = min(len(idx), pos_in_group + positive_window + 1)
+            candidates = idx[lo:hi]
+            candidates = candidates[candidates != ai]
+            if len(candidates) == 0:
+                continue
+            pi = rng.choice(candidates)
+            other_groups = [og for og in unique_groups if og != g]
+            if other_groups:
+                ng = rng.choice(other_groups)
+                ni = rng.choice(group_indices[ng])
+            else:
+                far_lo = max(0, pos_in_group - 3 * positive_window)
+                far_hi = min(len(idx), pos_in_group + 3 * positive_window + 1)
+                far_candidates = np.setdiff1d(idx, idx[far_lo:far_hi])
+                if len(far_candidates) == 0:
+                    far_candidates = idx
+                ni = rng.choice(far_candidates)
+            anchors.append(ai)
+            positives.append(pi)
+            negatives.append(ni)
+    return np.array(anchors), np.array(positives), np.array(negatives)
+def _info_nce_loss(anchor, positive, negative, temperature):
+    pos_sim = (anchor * positive).sum(dim=-1) / temperature
+    neg_sim = (anchor * negative).sum(dim=-1) / temperature
+    logits = torch.stack([pos_sim, neg_sim], dim=-1)
+    labels = torch.zeros(len(anchor), dtype=torch.long, device=anchor.device)
+    return F.cross_entropy(logits, labels)
+def learn_behavior_features(
+    matrix_path: str | os.PathLike[str],
+    output_path: str | os.PathLike[str],
+    *,
+    metadata_path: Optional[str | os.PathLike[str]] = None,
+    out_dim: int = _DEFAULT_DIM,
+    epochs: int = _DEFAULT_EPOCHS,
+    lr: float = _DEFAULT_LR,
+    positive_window: int = _POSITIVE_WINDOW,
+    temperature: float = _TEMPERATURE,
+    log_fn: Optional[Callable[[str], None]] = None,
+) -> dict[str, str]:
+    """Train a contrastive projection and write the projected embedding matrix.
+    Returns dict with ``matrix`` and ``metadata`` output paths.
+    """
+    from singlebehaviorlab.backend.clustering import _load_matrix_metadata
+    matrix_path = str(Path(matrix_path).expanduser().resolve())
+    output_path_obj = Path(output_path).expanduser().resolve()
+    output_path_obj.parent.mkdir(parents=True, exist_ok=True)
+    metadata_path_str = str(Path(metadata_path).expanduser().resolve()) if metadata_path else None
+    def _log(msg: str) -> None:
+        if log_fn:
+            log_fn(msg)
+    matrix_df, metadata_df = _load_matrix_metadata(matrix_path, metadata_path_str)
+    X = matrix_df.T
+    embeddings = X.values.astype(np.float32)
+    n_samples, in_dim = embeddings.shape
+    _log(f"Loaded {n_samples} embeddings ({in_dim}-dim)")
+    if metadata_df is None:
+        metadata_df = pd.DataFrame({"snippet": X.index, "group": "video_0"})
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = _ProjectionHead(in_dim, out_dim).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+    all_emb = torch.from_numpy(embeddings).to(device)
+    rng = np.random.default_rng(42)
+    pairs_per_epoch = max(1024, min(n_samples * 4, 65536))
+    _log(f"Training projection head ({in_dim} → {out_dim}) for {epochs} epochs on {device}")
+    for epoch in range(epochs):
+        anchors, positives, negatives = _build_pairs(metadata_df, pairs_per_epoch, positive_window, rng)
+        if len(anchors) == 0:
+            _log("No valid pairs found — check metadata has group/video_id column")
+            break
+        a_emb = model(all_emb[anchors])
+        p_emb = model(all_emb[positives])
+        n_emb = model(all_emb[negatives])
+        loss = _info_nce_loss(a_emb, p_emb, n_emb, temperature)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if (epoch + 1) % 10 == 0 or epoch == 0:
+            _log(f"  epoch {epoch + 1}/{epochs}  loss={loss.item():.4f}")
+    model.eval()
+    with torch.no_grad():
+        projected = model(all_emb).cpu().numpy()
+    _log(f"Projected embeddings: {projected.shape}")
+    snippet_ids = np.array(X.index.tolist())
+    feature_names = np.array([f"behavior_feat_{i}" for i in range(out_dim)])
+    out_matrix = str(output_path_obj)
+    if out_matrix.endswith("_matrix.npz"):
+        out_metadata = out_matrix.replace("_matrix.npz", "_metadata.npz")
+    elif out_matrix.endswith(".npz"):
+        out_metadata = out_matrix[:-4] + "_metadata.npz"
+    else:
+        out_metadata = out_matrix + "_metadata.npz"
+    np.savez_compressed(out_matrix, matrix=projected.T, feature_names=feature_names, snippet_ids=snippet_ids)
+    _log(f"Wrote projected matrix: {out_matrix}")
+    if metadata_df is not None:
+        np.savez_compressed(out_metadata, metadata=metadata_df.values, columns=np.array(metadata_df.columns))
+        _log(f"Wrote metadata: {out_metadata}")
+    return {"matrix": out_matrix, "metadata": out_metadata}

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab/backend/registration.py RENAMED Viewed

@@ -40,6 +40,7 @@ class RegistrationParams:
     clip_length_frames: int = 16
     step_frames: Optional[int] = None
     backbone_model: str = "videoprism_public_v1_base"
+    flip_invariant: bool = False
     experiment_name: Optional[str] = None
     @property
@@ -71,6 +72,7 @@ def _extract_embedding(
     backbone: VideoPrismBackbone,
     frames: np.ndarray,
     target_size: int,
+    flip_invariant: bool = False,
 ) -> Optional[np.ndarray]:
     try:
         resized = np.array([cv2.resize(f, (target_size, target_size)) for f in frames])
@@ -79,6 +81,12 @@ def _extract_embedding(
         with torch.no_grad():
             tokens = backbone(tensor)
             embedding = tokens.mean(dim=1).squeeze(0).cpu().numpy()
+            if flip_invariant:
+                embs = [embedding]
+                for dims in [[-1], [-2], [-1, -2]]:
+                    t_flip = torch.flip(tensor, dims=dims)
+                    embs.append(backbone(t_flip).mean(dim=1).squeeze(0).cpu().numpy())
+                embedding = np.mean(embs, axis=0)
         return embedding.astype(np.float32)
     except Exception:
         return None
@@ -178,7 +186,7 @@ def run_registration(
         if frames is None or len(frames) == 0:
             _log(f"Skipping {os.path.basename(clip_path)}: no frames")
             continue
-        embedding = _extract_embedding(backbone, frames, params.target_size)
+        embedding = _extract_embedding(backbone, frames, params.target_size, params.flip_invariant)
         del frames
         if embedding is None:
             _log(f"Skipping {os.path.basename(clip_path)}: embedding failed")

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab/cli.py RENAMED Viewed

@@ -178,6 +178,10 @@ def _build_parser() -> argparse.ArgumentParser:
         "--no-clahe", dest="clahe", action="store_false", default=None,
         help="Disable CLAHE contrast normalization.",
     )
+    register_parser.add_argument(
+        "--flip-invariant", action="store_true",
+        help="Average original + horizontally flipped embeddings to remove facing-direction bias. 2x extraction time.",
+    )
     _add_common_runtime_flags(register_parser)
     segment_parser = subparsers.add_parser(
@@ -347,6 +351,8 @@ def cmd_register(args: argparse.Namespace) -> int:
         params.target_fps = int(args.target_fps)
     if args.clahe is False:
         params.normalization_method = "None"
+    if args.flip_invariant:
+        params.flip_invariant = True
     bar = {"pbar": None}

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab/gui/clustering_widget.py RENAMED Viewed

@@ -27,7 +27,7 @@ from PyQt6.QtWidgets import (
     QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel,
     QComboBox, QSlider, QCheckBox, QGroupBox, QScrollArea, QSplitter,
     QMessageBox, QListWidget, QTextEdit, QFileDialog, QProgressBar, QDialog,
-    QSizePolicy, QDialogButtonBox
+    QSizePolicy, QDialogButtonBox, QApplication
 )
 from PyQt6.QtCore import Qt, QThread, pyqtSignal
 from PyQt6.QtGui import QFont
@@ -234,11 +234,29 @@ class ClusteringWidget(QWidget):
         )
         norm_row.addWidget(self.normalization_method)
         preprocess_layout.addLayout(norm_row)
+        self.subtract_video_mean_check = QCheckBox("Subtract per-video mean")
+        self.subtract_video_mean_check.setToolTip(
+            "Remove the average embedding of each video/group before clustering.\n"
+            "Reduces sensitivity to camera setup, lighting, and background\n"
+            "while preserving within-video behavior differences."
+        )
+        preprocess_layout.addWidget(self.subtract_video_mean_check)
+        self.learn_features_btn = QPushButton("Learn behavior features")
+        self.learn_features_btn.setToolTip(
+            "Train a contrastive projection on the loaded embeddings.\n"
+            "Clips close in time map nearby; clips far apart map far away.\n"
+            "Suppresses static visual factors and amplifies behavioral dynamics.\n"
+            "Replaces the current matrix with 128-dim projected embeddings."
+        )
+        self.learn_features_btn.clicked.connect(self._learn_behavior_features)
+        preprocess_layout.addWidget(self.learn_features_btn)
         self.preprocess_btn = QPushButton("Apply preprocessing")
         self.preprocess_btn.clicked.connect(self.apply_preprocessing)
         preprocess_layout.addWidget(self.preprocess_btn)
         self.preprocess_status = QLabel("Ready")
         self.preprocess_status.setWordWrap(True)
         self.preprocess_status.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Minimum)
@@ -742,8 +760,68 @@ class ClusteringWidget(QWidget):
         except Exception as e:
             QMessageBox.critical(self, "Load Error", f"Failed to load data: {e}")
+    def _learn_behavior_features(self):
+        if self.matrix_data is None:
+            QMessageBox.warning(self, "No data", "Load a feature matrix first.")
+            return
+        try:
+            import tempfile
+            from singlebehaviorlab.backend.contrastive import learn_behavior_features
+            self.preprocess_status.setText("Training contrastive projection...")
+            self.preprocess_status.setStyleSheet("color: blue;")
+            QApplication.processEvents()
+            with tempfile.NamedTemporaryFile(suffix="_matrix.npz", delete=False) as tmp_m:
+                tmp_matrix = tmp_m.name
+            tmp_metadata = tmp_matrix.replace("_matrix.npz", "_metadata.npz")
+            out_matrix = tmp_matrix.replace("_matrix.npz", "_proj_matrix.npz")
+            snippet_ids = np.array(self.matrix_data.columns.tolist())
+            feature_names = np.array(self.matrix_data.index.tolist())
+            np.savez_compressed(tmp_matrix, matrix=self.matrix_data.values, feature_names=feature_names, snippet_ids=snippet_ids)
+            if self.metadata is not None:
+                np.savez_compressed(tmp_metadata, metadata=self.metadata.values, columns=np.array(self.metadata.columns))
+            else:
+                tmp_metadata = None
+            log_lines = []
+            result = learn_behavior_features(
+                tmp_matrix,
+                out_matrix,
+                metadata_path=tmp_metadata,
+                log_fn=lambda msg: (log_lines.append(msg), self.preprocess_status.setText(msg), QApplication.processEvents()),
+            )
+            proj = np.load(result["matrix"], allow_pickle=True)
+            self.matrix_data = pd.DataFrame(proj["matrix"], index=proj["feature_names"], columns=proj["snippet_ids"])
+            for f in [tmp_matrix, tmp_metadata, out_matrix, result.get("metadata")]:
+                if f and os.path.exists(f):
+                    try:
+                        os.unlink(f)
+                    except Exception:
+                        pass
+            X = self.matrix_data.T
+            X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
+            from sklearn.preprocessing import StandardScaler
+            X_norm = StandardScaler().fit_transform(X)
+            self.processed_data = pd.DataFrame(X_norm, index=X.index, columns=range(X_norm.shape[1]))
+            n = self.matrix_data.shape[1]
+            self.preprocess_status.setText(
+                f"Behavior features: {n} clips → 128-dim (contrastive) → standardized. Ready to cluster."
+            )
+            self.preprocess_status.setStyleSheet("color: green;")
+            self.run_btn.setEnabled(True)
+        except Exception as e:
+            self.preprocess_status.setText(f"Feature learning failed: {e}")
+            self.preprocess_status.setStyleSheet("color: red;")
+            QMessageBox.critical(self, "Error", f"Contrastive training failed:\n{e}")
     def apply_preprocessing(self):
-        """Apply normalization."""
         if self.matrix_data is None:
             return
@@ -759,28 +837,44 @@ class ClusteringWidget(QWidget):
             # sklearn expects Samples as Rows. So we transpose.
             X = data.T
-            # Clean infinite/NaN
-            X = X.replace([np.inf, -np.inf], np.nan)
-            # Normalize
+            X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
+            steps = []
+            if self.subtract_video_mean_check.isChecked() and self.metadata is not None:
+                group_col = None
+                for col in ("group", "video_id"):
+                    if col in self.metadata.columns:
+                        group_col = col
+                        break
+                if group_col is not None:
+                    snippet_col = "snippet" if "snippet" in self.metadata.columns else None
+                    if snippet_col:
+                        for grp in self.metadata[group_col].unique():
+                            grp_snippets = self.metadata.loc[
+                                self.metadata[group_col] == grp, snippet_col
+                            ].values
+                            mask = X.index.isin(grp_snippets)
+                            if mask.sum() > 1:
+                                X.loc[mask] -= X.loc[mask].mean(axis=0)
+                        steps.append("video-mean-sub")
             norm_method = self.normalization_method.currentText()
             if norm_method == 'standard':
-                scaler = StandardScaler()
-                X_norm = scaler.fit_transform(X)
+                X_norm = StandardScaler().fit_transform(X)
             elif norm_method == 'minmax':
-                scaler = MinMaxScaler()
-                X_norm = scaler.fit_transform(X)
+                X_norm = MinMaxScaler().fit_transform(X)
             elif norm_method == 'l2':
-                scaler = Normalizer(norm='l2')
-                X_norm = scaler.fit_transform(X)
+                X_norm = Normalizer(norm='l2').fit_transform(X)
             else:
-                X_norm = X
-            # Store processed data (Samples x Features)
-            self.processed_data = pd.DataFrame(X_norm, index=X.index, columns=X.columns)
-            self.preprocess_status.setText(f"Normalized: {norm_method}")
+                X_norm = X.values if hasattr(X, 'values') else X
+            if norm_method != 'none':
+                steps.append(norm_method)
+            self.processed_data = pd.DataFrame(X_norm, index=X.index, columns=range(X_norm.shape[1]))
+            self.preprocess_status.setText(f"Preprocessed: {' → '.join(steps) or 'none'}")
             self.preprocess_status.setStyleSheet("color: green;")
         except Exception as e:

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab/gui/registration_widget.py RENAMED Viewed

@@ -326,15 +326,18 @@ class EmbeddingExtractionWorker(QThread):
     error = pyqtSignal(str)
     log_message = pyqtSignal(str)
-    def __init__(self, clip_paths: list, output_dir: str, experiment_name: str = None, model_name: str = 'videoprism_public_v1_base', clip_frame_ranges: dict = None, append_to_existing: bool = False):
+    def __init__(self, clip_paths: list, output_dir: str, experiment_name: str = None, model_name: str = 'videoprism_public_v1_base', clip_frame_ranges: dict = None, append_to_existing: bool = False, flip_invariant: bool = False, align_orientation: bool = False, mask_path: str = None):
         super().__init__()
-        self.clip_paths = clip_paths  # List of clip paths (strings)
-        self.clip_frame_ranges = clip_frame_ranges or {}  # Dict mapping clip_path -> (start_frame, end_frame)
+        self.clip_paths = clip_paths
+        self.clip_frame_ranges = clip_frame_ranges or {}
         self.output_dir = output_dir
         self.experiment_name = experiment_name
         self.model_name = model_name
         self.should_stop = False
         self.append_to_existing = append_to_existing
+        self.flip_invariant = flip_invariant
+        self.align_orientation = align_orientation
+        self.mask_path = mask_path
     def stop(self):
         self.should_stop = True
@@ -352,6 +355,8 @@ class EmbeddingExtractionWorker(QThread):
             embed_dim = backbone.get_embed_dim()
             self.log_message.emit(f"VideoPrism model loaded. Embedding dimension: {embed_dim}")
+            if self.flip_invariant:
+                self.log_message.emit("Flip-invariant mode: averaging 4 orientations (original, hflip, vflip, both)")
             feature_matrix = []
             metadata = []
@@ -372,10 +377,7 @@ class EmbeddingExtractionWorker(QThread):
                     self.log_message.emit(f"Warning: Could not load frames from {clip_name}, skipping")
                     continue
-                # Extract embedding
                 embedding = self._extract_embedding(backbone, frames)
-                # Free frames memory immediately after use
                 del frames
                 if embedding is None:
@@ -539,25 +541,7 @@ class EmbeddingExtractionWorker(QThread):
                 self.log_message.emit(f"NPZ save failed (metadata): {e}")
                 npz_metadata_path = None
-            # Also save Parquet as backup (faster than CSV, still readable)
-            try:
-                matrix_df = pd.DataFrame(feature_matrix.T, index=feature_names, columns=snippet_ids)
-                parquet_matrix_path = os.path.join(self.output_dir, f'{base_name}_matrix.parquet')
-                matrix_df.to_parquet(parquet_matrix_path, index=True)
-                self.log_message.emit(f"Saved feature matrix (Parquet) to {parquet_matrix_path}")
-            except Exception as e:
-                self.log_message.emit(f"Parquet save failed (matrix): {e}")
-            try:
-                parquet_metadata_path = os.path.join(self.output_dir, f'{base_name}_metadata.parquet')
-                metadata_df.to_parquet(parquet_metadata_path, index=False)
-                self.log_message.emit(f"Saved metadata (Parquet) to {parquet_metadata_path}")
-            except Exception as e:
-                self.log_message.emit(f"Parquet save failed (metadata): {e}")
-            # Emit NPZ paths (primary format)
-            self.finished.emit(npz_matrix_path if npz_matrix_path else parquet_matrix_path,
-                             npz_metadata_path if npz_metadata_path else parquet_metadata_path)
+            self.finished.emit(npz_matrix_path, npz_metadata_path)
         except Exception as e:
             import traceback
@@ -584,38 +568,36 @@ class EmbeddingExtractionWorker(QThread):
         return np.array(frames) if frames else None
     def _extract_embedding(self, backbone: VideoPrismBackbone, frames: np.ndarray) -> np.ndarray:
-        """Extract mean-pooled VideoPrism embedding from frames."""
         try:
-            # Resize frames to 288x288 (VideoPrism expects this)
             target_size = 288
             processed_frames = []
             for frame in frames:
                 resized = cv2.resize(frame, (target_size, target_size))
                 processed_frames.append(resized)
             frames_resized = np.array(processed_frames)
-            del processed_frames  # Free list memory
-            # Convert to PyTorch format: (T, C, H, W) and normalize to [0, 1]
-            frames_t = np.transpose(frames_resized, (0, 3, 1, 2))  # (T, C, H, W)
-            del frames_resized  # Free numpy array
+            del processed_frames
+            frames_t = np.transpose(frames_resized, (0, 3, 1, 2))
+            del frames_resized
             frames_tensor = torch.from_numpy(frames_t).float() / 255.0
-            del frames_t  # Free numpy array
-            # Add batch dimension: (1, T, C, H, W)
+            del frames_t
             frames_tensor = frames_tensor.unsqueeze(0)
             with torch.no_grad():
-                # VideoPrism returns (B, T*N, D) where N = 16*16 = 256
-                tokens = backbone(frames_tensor)  # (1, T*256, D)
-                del frames_tensor  # Free input tensor immediately
-                # Mean pool over all tokens to get single embedding vector
-                embedding = tokens.mean(dim=1).squeeze(0)  # (D,)
-                del tokens  # Free large token tensor
+                tokens = backbone(frames_tensor)
+                embedding = tokens.mean(dim=1).squeeze(0)
+                del tokens
+                if self.flip_invariant:
+                    embs = [embedding.cpu().numpy()]
+                    for dims in [[-1], [-2], [-1, -2]]:
+                        t_flip = torch.flip(frames_tensor, dims=dims)
+                        embs.append(backbone(t_flip).mean(dim=1).squeeze(0).cpu().numpy())
+                        del t_flip
+                    embedding = torch.from_numpy(np.mean(embs, axis=0))
+                    del embs
+                del frames_tensor
             result = embedding.cpu().numpy()
-            del embedding  # Free GPU tensor
+            del embedding
             return result
         except Exception as e:
@@ -821,6 +803,16 @@ class RegistrationWidget(QWidget):
         self.output_dir_label = QLabel("Clips will be saved to experiment folder")
         output_layout.addWidget(self.output_dir_label)
+        self.flip_invariant_check = QCheckBox("Flip-invariant embeddings")
+        self.flip_invariant_check.setChecked(False)
+        self.flip_invariant_check.setToolTip(
+            "Run each clip through VideoPrism in 4 orientations (original, hflip,\n"
+            "vflip, both) and average the embeddings. Removes sensitivity to the\n"
+            "animal's facing direction and vertical orientation. 4x extraction time."
+        )
+        output_layout.addWidget(self.flip_invariant_check)
         self.append_embeddings_check = QCheckBox("Append to existing embeddings if present")
         self.append_embeddings_check.setChecked(False)
         self.append_embeddings_check.setToolTip("When enabled, if an existing behaviorome matrix/metadata is found in the experiment, new embeddings will be appended instead of creating a new file.")
@@ -1090,7 +1082,6 @@ class RegistrationWidget(QWidget):
         self.log_text.append(f"Output directory: {self.output_dir}")
         self.log_text.append(f"Created {len(output_paths)} clip(s)")
-        # Extract clip paths and frame ranges from tuples
         clip_paths_list = []
         self.clip_frame_ranges = {}
         for item in output_paths:
@@ -1099,8 +1090,8 @@ class RegistrationWidget(QWidget):
                 clip_paths_list.append(clip_path)
                 self.clip_frame_ranges[clip_path] = (start_frame, end_frame)
             else:
-                # Legacy: just a path string
                 clip_paths_list.append(item)
         # Group clips by video (using extracted paths)
         clips_by_video = {}
@@ -1265,13 +1256,21 @@ class RegistrationWidget(QWidget):
         experiment_name = self.config.get("experiment_name", None)
         # Start extraction worker with frame ranges if available
+        mask_path = None
+        if self.align_orientation_check.isChecked() and self.video_mask_pairs:
+            mask_path = self.video_mask_pairs[0][1] if len(self.video_mask_pairs) > 0 else None
+            self.log_text.append(f"Align orientation: mask_path={mask_path}, pairs={len(self.video_mask_pairs)}, frame_ranges={len(self.clip_frame_ranges)}")
         self.embedding_worker = EmbeddingExtractionWorker(
-            clip_paths,
-            self.output_dir,
+            clip_paths,
+            self.output_dir,
             experiment_name=experiment_name,
             model_name=model_name,
             clip_frame_ranges=self.clip_frame_ranges if hasattr(self, 'clip_frame_ranges') else None,
-            append_to_existing=self.append_embeddings_check.isChecked()
+            append_to_existing=self.append_embeddings_check.isChecked(),
+            flip_invariant=self.flip_invariant_check.isChecked(),
+            align_orientation=self.align_orientation_check.isChecked(),
+            mask_path=mask_path,
         )
         self.embedding_worker.progress.connect(self._on_embedding_progress)
         self.embedding_worker.finished.connect(self._on_embedding_finished)

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2/singlebehaviorlab.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: singlebehaviorlab
-Version: 2.3.1
+Version: 2.3.2
 Summary: Semi-automated behavioral video annotation, training, and analysis tool
 Author: Almir Aljovic
 Maintainer: Almir Aljovic

{singlebehaviorlab-2.3.1 → singlebehaviorlab-2.3.2}/singlebehaviorlab.egg-info/SOURCES.txt RENAMED Viewed

@@ -16,6 +16,7 @@ singlebehaviorlab.egg-info/top_level.txt
 singlebehaviorlab/backend/__init__.py
 singlebehaviorlab/backend/augmentations.py
 singlebehaviorlab/backend/clustering.py
+singlebehaviorlab/backend/contrastive.py
 singlebehaviorlab/backend/data_store.py
 singlebehaviorlab/backend/inference.py
 singlebehaviorlab/backend/model.py