PyPI - endoreg-db - Versions diffs - 0.8.3.1__py3-none-any.whl → 0.8.3.3__py3-none-any.whl - Mend

endoreg-db 0.8.3.1py3-none-any.whl → 0.8.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of endoreg-db might be problematic. Click here for more details.

Files changed (9) hide show

endoreg_db/management/commands/create_model_meta_from_huggingface.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""
+Django management command to create ModelMeta from Hugging Face model.
+"""
+from pathlib import Path
+from django.core.files.base import ContentFile
+from django.core.management.base import BaseCommand
+from huggingface_hub import hf_hub_download
+from endoreg_db.models import AiModel, LabelSet, ModelMeta
+class Command(BaseCommand):
+    help = "Create ModelMeta by downloading model from Hugging Face"
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--model_id",
+            type=str,
+            default="wg-lux/colo_segmentation_RegNetX800MF_base",
+            help="Hugging Face model ID",
+        )
+        parser.add_argument(
+            "--model_name",
+            type=str,
+            default="image_multilabel_classification_colonoscopy_default",
+            help="Name for the AI model",
+        )
+        parser.add_argument(
+            "--labelset_name",
+            type=str,
+            default="multilabel_classification_colonoscopy_default",
+            help="Name of the labelset",
+        )
+        parser.add_argument(
+            "--meta_version",
+            type=str,
+            default="1",
+            help="Version for the model meta",
+        )
+    def handle(self, *args, **options):
+        model_id = options["model_id"]
+        model_name = options["model_name"]
+        labelset_name = options["labelset_name"]
+        version = options["meta_version"]
+        self.stdout.write(f"Downloading model {model_id} from Hugging Face...")
+        try:
+            # Download the model weights
+            weights_path = hf_hub_download(
+                repo_id=model_id,
+                filename="colo_segmentation_RegNetX800MF_base.ckpt",
+                local_dir="/tmp",
+            )
+            self.stdout.write(f"Downloaded weights to: {weights_path}")
+            # Get or create AI model
+            ai_model, created = AiModel.objects.get_or_create(
+                name=model_name, defaults={"description": f"Model from {model_id}"}
+            )
+            if created:
+                self.stdout.write(f"Created AI model: {ai_model.name}")
+            # Get labelset
+            try:
+                labelset = LabelSet.objects.get(name=labelset_name)
+            except LabelSet.DoesNotExist:
+                self.stdout.write(
+                    self.style.ERROR(f"LabelSet '{labelset_name}' not found")
+                )
+                return
+            # Create ModelMeta
+            model_meta, created = ModelMeta.objects.get_or_create(
+                name=model_name,
+                model=ai_model,
+                version=version,
+                defaults={
+                    "labelset": labelset,
+                    "activation": "sigmoid",
+                    "mean": "0.45211223,0.27139644,0.19264949",
+                    "std": "0.31418097,0.21088019,0.16059452",
+                    "size_x": 716,
+                    "size_y": 716,
+                    "axes": "2,0,1",
+                    "batchsize": 16,
+                    "num_workers": 0,
+                    "description": f"Downloaded from {model_id}",
+                },
+            )
+            # Save the weights file to the model
+            with open(weights_path, "rb") as f:
+                model_meta.weights.save(
+                    f"{model_name}_v{version}_colo_segmentation_RegNetX800MF_base.ckpt",
+                    ContentFile(f.read()),
+                )
+            # Set as active meta
+            ai_model.active_meta = model_meta
+            ai_model.save()
+            self.stdout.write(
+                self.style.SUCCESS(
+                    f"Successfully {'created' if created else 'updated'} ModelMeta: {model_meta}"
+                )
+            )
+        except Exception as e:
+            self.stdout.write(self.style.ERROR(f"Error creating ModelMeta: {e}"))
+            import traceback
+            traceback.print_exc()

endoreg_db/management/commands/setup_endoreg_db.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+Django management command to perform complete setup for EndoReg DB when used as an embedded app.
+This command ensures all necessary data and configurations are initialized.
+"""
+import os
+from pathlib import Path
+from django.core.management import call_command
+from django.core.management.base import BaseCommand
+class Command(BaseCommand):
+    help = """
+    Complete setup for EndoReg DB when used as an embedded app.
+    This command performs all necessary initialization steps:
+    1. Loads base database data
+    2. Sets up AI models and labels
+    3. Creates cache table
+    4. Initializes model metadata
+    """
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--skip-ai-setup",
+            action="store_true",
+            help="Skip AI model setup (for cases where AI features are not needed)",
+        )
+        parser.add_argument(
+            "--force-recreate",
+            action="store_true",
+            help="Force recreation of AI model metadata even if it exists",
+        )
+    def handle(self, *args, **options):
+        skip_ai = options.get("skip_ai_setup", False)
+        force_recreate = options.get("force_recreate", False)
+        self.stdout.write(self.style.SUCCESS("🚀 Starting EndoReg DB embedded app setup..."))
+        # Step 1: Load base database data
+        self.stdout.write("\n📊 Step 1: Loading base database data...")
+        try:
+            call_command("load_base_db_data")
+            self.stdout.write(self.style.SUCCESS("✅ Base database data loaded successfully"))
+        except Exception as e:
+            self.stdout.write(self.style.ERROR(f"❌ Failed to load base data: {e}"))
+            return
+            # Step 2: Create cache table (only if using database caching)
+        self.stdout.write("\n💾 Step 2: Setting up caching...")
+        from django.conf import settings
+        cache_backend = settings.CACHES.get("default", {}).get("BACKEND", "")
+        if "db" in cache_backend or "database" in cache_backend:
+            self.stdout.write("Using database caching - creating cache table...")
+            try:
+                call_command("createcachetable")
+                self.stdout.write(self.style.SUCCESS("✅ Cache table created successfully"))
+            except Exception as e:
+                self.stdout.write(self.style.ERROR(f"❌ Failed to create cache table: {e}"))
+                return
+        else:
+            self.stdout.write("Using in-memory caching - skipping cache table creation")
+        if skip_ai:
+            self.stdout.write(self.style.WARNING("\n⚠️  Skipping AI setup as requested"))
+        else:
+            # Step 3: Load AI model data
+            self.stdout.write("\n🤖 Step 3: Loading AI model data...")
+            try:
+                call_command("load_ai_model_data")
+                self.stdout.write(self.style.SUCCESS("✅ AI model data loaded successfully"))
+            except Exception as e:
+                self.stdout.write(self.style.ERROR(f"❌ Failed to load AI model data: {e}"))
+                return
+            # Step 4: Load AI model label data
+            self.stdout.write("\n🏷️  Step 4: Loading AI model label data...")
+            try:
+                call_command("load_ai_model_label_data")
+                self.stdout.write(self.style.SUCCESS("✅ AI model label data loaded successfully"))
+            except Exception as e:
+                self.stdout.write(self.style.ERROR(f"❌ Failed to load AI model label data: {e}"))
+                return
+            # Step 5: Create model metadata
+            self.stdout.write("\n📋 Step 5: Creating AI model metadata...")
+            try:
+                # Check if model metadata already exists
+                from endoreg_db.models import AiModel
+                default_model_name = "image_multilabel_classification_colonoscopy_default"
+                ai_model = AiModel.objects.filter(name=default_model_name).first()
+                if not ai_model:
+                    self.stdout.write(self.style.ERROR(f"❌ AI model '{default_model_name}' not found"))
+                    return
+                existing_meta = ai_model.metadata_versions.first()
+                if existing_meta and not force_recreate:
+                    self.stdout.write(self.style.SUCCESS("✅ Model metadata already exists (use --force-recreate to recreate)"))
+                else:
+                    # Try to create model metadata
+                    model_path = self._find_model_weights_file()
+                    if model_path:
+                        call_command(
+                            "create_multilabel_model_meta",
+                            model_name=default_model_name,
+                            model_meta_version=1,
+                            image_classification_labelset_name="multilabel_classification_colonoscopy_default",
+                            model_path=str(model_path),
+                        )
+                        self.stdout.write(self.style.SUCCESS("✅ AI model metadata created successfully"))
+                    else:
+                        self.stdout.write(self.style.WARNING("⚠️  Model weights file not found. AI features may not work properly."))
+            except Exception as e:
+                self.stdout.write(self.style.ERROR(f"❌ Failed to create AI model metadata: {e}"))
+                return
+        # Step 6: Verification
+        self.stdout.write("\n🔍 Step 6: Verifying setup...")
+        try:
+            self._verify_setup()
+            self.stdout.write(self.style.SUCCESS("✅ Setup verification completed successfully"))
+        except Exception as e:
+            self.stdout.write(self.style.ERROR(f"❌ Setup verification failed: {e}"))
+            return
+        self.stdout.write(self.style.SUCCESS("\n🎉 EndoReg DB embedded app setup completed successfully!"))
+        self.stdout.write("\nNext steps:")
+        self.stdout.write("1. Run migrations: python manage.py migrate")
+        self.stdout.write("2. Create superuser: python manage.py createsuperuser")
+        self.stdout.write("3. Start development server: python manage.py runserver")
+    def _find_model_weights_file(self):
+        """Find the model weights file in various possible locations."""
+        # Check common locations for model weights
+        possible_paths = [
+            # Test assets (for development)
+            Path("tests/assets/colo_segmentation_RegNetX800MF_6.ckpt"),
+            # Project root assets
+            Path("assets/colo_segmentation_RegNetX800MF_6.ckpt"),
+            # Storage directory
+            Path("data/storage/model_weights/colo_segmentation_RegNetX800MF_6.ckpt"),
+            # Absolute paths based on environment
+            Path(os.getenv("STORAGE_DIR", "storage")) / "model_weights" / "colo_segmentation_RegNetX800MF_6.ckpt",
+        ]
+        for path in possible_paths:
+            if path.exists():
+                self.stdout.write(f"Found model weights at: {path}")
+                return path
+        self.stdout.write("Model weights file not found in standard locations")
+        return None
+    def _verify_setup(self):
+        """Verify that the setup was successful."""
+        from django.conf import settings
+        from django.db import connection
+        # Check that required tables exist
+        required_tables = [
+            "endoreg_db_aimodel",
+            "endoreg_db_modelmeta",
+        ]
+        # Only check for cache table if using database caching
+        cache_backend = settings.CACHES.get("default", {}).get("BACKEND", "")
+        if "db" in cache_backend or "database" in cache_backend:
+            required_tables.append("django_cache_table")
+        cursor = connection.cursor()
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        existing_tables = [row[0] for row in cursor.fetchall()]
+        missing_tables = [table for table in required_tables if table not in existing_tables]
+        if missing_tables:
+            raise Exception(f"Missing required tables: {missing_tables}")
+        # Check that AI models exist (if AI setup was performed)
+        from endoreg_db.models import AiModel
+        if AiModel.objects.exists():
+            ai_model_count = AiModel.objects.count()
+            self.stdout.write(f"Found {ai_model_count} AI model(s)")
+            # Check for model metadata
+            from endoreg_db.models import ModelMeta
+            meta_count = ModelMeta.objects.count()
+            self.stdout.write(f"Found {meta_count} model metadata record(s)")
+        self.stdout.write("Setup verification passed")

endoreg_db/models/metadata/model_meta_logic.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import shutil
+from logging import getLogger
 from pathlib import Path
-from typing import Optional, TYPE_CHECKING, Any, Type
-from huggingface_hub import hf_hub_download
+from typing import TYPE_CHECKING, Any, Optional, Type
 from django.db import transaction
+from huggingface_hub import hf_hub_download
 # Assuming ModelMeta, AiModel, LabelSet are importable from the correct locations
 # Adjust imports based on your project structure if necessary
 from ..administration.ai.ai_model import AiModel
 from ..label.label_set import LabelSet
-from ..utils import WEIGHTS_DIR, STORAGE_DIR
-from logging import getLogger
+from ..utils import STORAGE_DIR, WEIGHTS_DIR
 logger = getLogger("ai_model")
 if TYPE_CHECKING:
-    from .model_meta import ModelMeta # Import ModelMeta for type hinting
+    from .model_meta import ModelMeta  # Import ModelMeta for type hinting
 def get_latest_version_number_logic(
@@ -29,13 +29,13 @@ def get_latest_version_number_logic(
     """
     versions_qs = cls.objects.filter(
         name=meta_name, model__name=model_name
-    ).values_list('version', flat=True)
+    ).values_list("version", flat=True)
     max_v = 0
     found_numeric_version = False
     for v_str in versions_qs:
-        if v_str is None: # Skip None versions
+        if v_str is None:  # Skip None versions
             continue
         try:
             v_int = int(v_str)
@@ -47,13 +47,13 @@ def get_latest_version_number_logic(
                 f"Warning: Could not parse version string '{v_str}' as an integer for "
                 f"meta_name='{meta_name}', model_name='{model_name}' while determining the max version."
             )
     return max_v if found_numeric_version else 0
 @transaction.atomic
 def create_from_file_logic(
-    cls: Type["ModelMeta"], # cls is ModelMeta
+    cls: Type["ModelMeta"],  # cls is ModelMeta
     meta_name: str,
     model_name: str,
     labelset_name: str,
@@ -94,11 +94,14 @@ def create_from_file_logic(
             )
         elif existing and bump_if_exists:
             target_version = str(latest_version_num + 1)
-            logger.info(f"Bumping version for {meta_name}/{model_name} to {target_version}")
+            logger.info(
+                f"Bumping version for {meta_name}/{model_name} to {target_version}"
+            )
     else:
         target_version = str(latest_version_num + 1)
-        logger.info(f"Setting next version for {meta_name}/{model_name} to {target_version}")
+        logger.info(
+            f"Setting next version for {meta_name}/{model_name} to {target_version}"
+        )
     # --- Prepare Weights File ---
     source_weights_path = Path(weights_file).resolve()
@@ -108,7 +111,10 @@ def create_from_file_logic(
     # Construct destination path within MEDIA_ROOT/WEIGHTS_DIR
     weights_filename = source_weights_path.name
     # Relative path for the FileField upload_to
-    relative_dest_path = Path(WEIGHTS_DIR.relative_to(STORAGE_DIR)) / f"{meta_name}_v{target_version}_{weights_filename}"
+    relative_dest_path = (
+        Path(WEIGHTS_DIR.relative_to(STORAGE_DIR))
+        / f"{meta_name}_v{target_version}_{weights_filename}"
+    )
     # Full path for shutil.copy
     full_dest_path = STORAGE_DIR / relative_dest_path
@@ -125,8 +131,8 @@ def create_from_file_logic(
     # --- Create/Update ModelMeta Instance ---
     defaults = {
         "labelset": label_set,
-        "weights": relative_dest_path.as_posix(), # Store relative path for FileField
-        **kwargs, # Pass through other fields like activation, mean, std, etc.
+        "weights": relative_dest_path.as_posix(),  # Store relative path for FileField
+        **kwargs,  # Pass through other fields like activation, mean, std, etc.
     }
     # Remove None values from defaults to avoid overriding model defaults unnecessarily
@@ -152,35 +158,39 @@ def create_from_file_logic(
     return model_meta
 # --- Add other logic functions referenced by ModelMeta here ---
 # (get_latest_version_number_logic, get_activation_function_logic, etc.)
 # Placeholder for get_activation_function_logic
 def get_activation_function_logic(activation_name: str):
-    import torch.nn as nn # Import locally as it's specific to this function
+    import torch.nn as nn  # Import locally as it's specific to this function
     if activation_name.lower() == "sigmoid":
         return nn.Sigmoid()
     elif activation_name.lower() == "softmax":
         # Note: Softmax usually requires specifying the dimension
-        return nn.Softmax(dim=1) # Assuming dim=1 (channels) is common
+        return nn.Softmax(dim=1)  # Assuming dim=1 (channels) is common
     elif activation_name.lower() == "none":
         return nn.Identity()
     else:
         # Consider adding more activations or raising an error
         raise ValueError(f"Unsupported activation function: {activation_name}")
 # Placeholder for get_inference_dataset_config_logic
 def get_inference_dataset_config_logic(model_meta: "ModelMeta") -> dict:
     # This would typically extract relevant fields from model_meta
     # for configuring a dataset during inference
     return {
-        "mean": [float(x) for x in model_meta.mean.split(',')],
-        "std": [float(x) for x in model_meta.std.split(',')],
-        "size_y": model_meta.size_y, # Add size_y key
-        "size_x": model_meta.size_x, # Add size_x key
-        "axes": [int(x) for x in model_meta.axes.split(',')],
+        "mean": [float(x) for x in model_meta.mean.split(",")],
+        "std": [float(x) for x in model_meta.std.split(",")],
+        "size_y": model_meta.size_y,  # Add size_y key
+        "size_x": model_meta.size_x,  # Add size_x key
+        "axes": [int(x) for x in model_meta.axes.split(",")],
         # Add other relevant config like normalization type, etc.
     }
 # Placeholder for get_config_dict_logic
 def get_config_dict_logic(model_meta: "ModelMeta") -> dict:
     # Returns a dictionary representation of the model's configuration
@@ -202,6 +212,7 @@ def get_config_dict_logic(model_meta: "ModelMeta") -> dict:
         # Add any other relevant fields
     }
 # Placeholder for get_model_meta_by_name_version_logic
 def get_model_meta_by_name_version_logic(
     cls: Type["ModelMeta"],
@@ -227,17 +238,24 @@ def get_model_meta_by_name_version_logic(
             ) from exc
     else:
         # Get latest version
-        latest = cls.objects.filter(name=meta_name, model=ai_model).order_by("-date_created").first()
+        latest = (
+            cls.objects.filter(name=meta_name, model=ai_model)
+            .order_by("-date_created")
+            .first()
+        )
         if latest:
             return latest
         else:
             raise cls.DoesNotExist(
                 f"No ModelMeta found for '{meta_name}' and model '{model_name}'."
             )
-from huggingface_hub import model_info
 import re
+from huggingface_hub import model_info
 def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
     """
     Infers default model metadata (activation, normalization, input size)
@@ -248,7 +266,9 @@ def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
     """
     if not (info := model_info(model_id)):
-        logger.info(f"Could not retrieve model info for {model_id}, using ColoReg segmentation defaults.")
+        logger.info(
+            f"Could not retrieve model info for {model_id}, using ColoReg segmentation defaults."
+        )
         return {
             "name": "wg-lux/colo_segmentation_RegNetX800MF_base",
             "activation": "sigmoid",
@@ -295,18 +315,29 @@ def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
         "size_y": size_y,
         "description": f"Inferred defaults for {model_id}",
     }
-def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str | None = None):
+def setup_default_from_huggingface_logic(
+    cls, model_id: str, labelset_name: str | None = None
+):
     """
     Downloads model weights from Hugging Face and auto-fills ModelMeta fields.
     """
     meta = infer_default_model_meta_from_hf(model_id)
     # Download weights
-    weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=WEIGHTS_DIR)
+    weights_path = hf_hub_download(
+        repo_id=model_id,
+        filename="colo_segmentation_RegNetX800MF_base.ckpt",
+        local_dir=WEIGHTS_DIR,
+    )
     ai_model, _ = AiModel.objects.get_or_create(name=meta["name"])
-    labelset = LabelSet.objects.first() if not labelset_name else LabelSet.objects.get(name=labelset_name)
+    labelset = (
+        LabelSet.objects.first()
+        if not labelset_name
+        else LabelSet.objects.get(name=labelset_name)
+    )
     return create_from_file_logic(
         cls,

endoreg_db/services/video_import.py CHANGED Viewed

@@ -19,16 +19,12 @@ from pathlib import Path
 from typing import Union, Dict, Any, Optional, List, Tuple
 from django.db import transaction
 from endoreg_db.models import VideoFile, SensitiveMeta
-from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
+from endoreg_db.utils.paths import STORAGE_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
 import random
-from lx_anonymizer.ocr import trocr_full_image_ocr
 from endoreg_db.utils.hashs import get_video_hash
-from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
-from typing import TYPE_CHECKING
+from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
 from django.db.models.fields.files import FieldFile
-if TYPE_CHECKING:
-    from endoreg_db.models import EndoscopyProcessor
+from endoreg_db.models import EndoscopyProcessor
 # File lock configuration (matches PDF import)
 STALE_LOCK_SECONDS = 6000  # 100 minutes - reclaim locks older than this
@@ -58,15 +54,13 @@ class VideoImportService():
             self.project_root = Path(__file__).parent.parent.parent.parent
         # Track processed files to prevent duplicates
-        self.processed_files = set(str(file) for file in os.listdir(ANONYM_VIDEO_DIR))
-        self.STORAGE_DIR = STORAGE_DIR
+        self.processed_files = set(str(Path(ANONYM_VIDEO_DIR) / file) for file in os.listdir(ANONYM_VIDEO_DIR))
         # Central video instance and processing context
         self.current_video: Optional[VideoFile] = None
         self.processing_context: Dict[str, Any] = {}
-        self.delete_source = False
+        self.delete_source = True
         self.logger = logging.getLogger(__name__)
@@ -225,8 +219,12 @@ class VideoImportService():
         # Acquire file lock to prevent concurrent processing
         # Lock will be held until finally block in import_and_anonymize()
-        self.processing_context['_lock_context'] = self._file_lock(file_path)
-        self.processing_context['_lock_context'].__enter__()
+        try:
+            self.processing_context['_lock_context'] = self._file_lock(file_path)
+            self.processing_context['_lock_context'].__enter__()
+        except Exception:
+            self._cleanup_processing_context()
+            raise
         self.logger.info("Acquired file lock for: %s", file_path)
@@ -274,96 +272,78 @@ class VideoImportService():
     def _move_to_final_storage(self):
         """
         Move video from raw_videos to final storage locations.
-        - Raw video → /data/videos (raw_file_path)
+        - Raw video → /data/videos (raw_file_path)
         - Processed video will later → /data/anonym_videos (file_path)
         """
         from endoreg_db.utils import data_paths
-        source_path = self.processing_context['file_path']
-        videos_dir = data_paths["video"]
-        videos_dir.mkdir(parents=True, exist_ok=True)
+        source_path = Path(self.processing_context["file_path"])
+        _current_video = self._require_current_video()
+        videos_dir = Path(data_paths["video"])
+        storage_root = Path(data_paths["storage"])
-        _current_video = self.current_video
-        assert _current_video is not None, "Current video instance is None during storage move"
+        videos_dir.mkdir(parents=True, exist_ok=True)
+        # --- Derive stored_raw_path safely ---
         stored_raw_path = None
-        if hasattr(_current_video, "get_raw_file_path"):
-            possible_path = _current_video.get_raw_file_path()
-            if possible_path:
-                try:
-                    stored_raw_path = Path(possible_path)
-                except (TypeError, ValueError):
-                    stored_raw_path = None
-        if stored_raw_path:
-            try:
-                storage_root = data_paths["storage"]
-                if stored_raw_path.is_absolute():
-                    if not stored_raw_path.is_relative_to(storage_root):
+        try:
+            if hasattr(_current_video, "get_raw_file_path"):
+                candidate = _current_video.get_raw_file_path()
+                if candidate:
+                    candidate_path = Path(candidate)
+                    # Accept only if under storage_root
+                    try:
+                        candidate_path.relative_to(storage_root)
+                        stored_raw_path = candidate_path
+                    except ValueError:
+                        # outside storage_root, reset
                         stored_raw_path = None
-                else:
-                    if stored_raw_path.parts and stored_raw_path.parts[0] == videos_dir.name:
-                        stored_raw_path = storage_root / stored_raw_path
-                    else:
-                        stored_raw_path = videos_dir / stored_raw_path.name
-            except Exception:
-                stored_raw_path = None
-        if stored_raw_path and not stored_raw_path.suffix:
+        except Exception:
             stored_raw_path = None
+        # Fallback: derive from UUID + suffix
         if not stored_raw_path:
+            suffix = source_path.suffix or ".mp4"
             uuid_str = getattr(_current_video, "uuid", None)
-            source_suffix = Path(source_path).suffix or ".mp4"
-            filename = f"{uuid_str}{source_suffix}" if uuid_str else Path(source_path).name
+            filename = f"{uuid_str}{suffix}" if uuid_str else source_path.name
             stored_raw_path = videos_dir / filename
-        delete_source = bool(self.processing_context.get('delete_source'))
+        delete_source = bool(self.processing_context.get("delete_source", True))
         stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
-        if not stored_raw_path.exists():
-            try:
-                if source_path.exists():
-                    if delete_source:
-                        shutil.move(str(source_path), str(stored_raw_path))
-                        self.logger.info("Moved raw video to: %s", stored_raw_path)
-                    else:
-                        shutil.copy2(str(source_path), str(stored_raw_path))
-                        self.logger.info("Copied raw video to: %s", stored_raw_path)
-                else:
-                    raise FileNotFoundError(f"Neither stored raw path nor source path exists for {self.processing_context['file_path']}")
-            except Exception as e:
-                self.logger.error("Failed to place video in final storage: %s", e)
-                raise
-        else:
-            # If we already have the stored copy, respect delete_source flag without touching assets unnecessarily
-            if delete_source and source_path.exists():
+        # --- Move or copy raw video ---
+        try:
+            if delete_source:
+                # Try atomic move first, fallback to copy+unlink
                 try:
+                    os.replace(source_path, stored_raw_path)
+                    self.logger.info("Moved raw video to: %s", stored_raw_path)
+                except Exception:
+                    shutil.copy2(source_path, stored_raw_path)
                     os.remove(source_path)
-                    self.logger.info("Removed original source file after storing copy: %s", source_path)
-                except OSError as e:
-                    self.logger.warning("Failed to remove source file %s: %s", source_path, e)
+                    self.logger.info("Copied & removed raw video to: %s", stored_raw_path)
+            else:
+                shutil.copy2(source_path, stored_raw_path)
+                self.logger.info("Copied raw video to: %s", stored_raw_path)
+        except Exception as e:
+            self.logger.error("Failed to move/copy video to final storage: %s", e)
+            raise
-        # Ensure database path points to stored location (relative to storage root)
+        # --- Ensure DB raw_file is relative to storage root ---
         try:
-            storage_root = data_paths["storage"]
-            relative_path = Path(stored_raw_path).relative_to(storage_root)
-            if _current_video.raw_file.name != str(relative_path):
-                _current_video.raw_file.name = str(relative_path)
-                _current_video.save(update_fields=['raw_file'])
-                self.logger.info("Updated raw_file path to: %s", relative_path)
-        except Exception as e:
-            self.logger.error("Failed to ensure raw_file path is relative: %s", e)
-            fallback_relative = Path("videos") / Path(stored_raw_path).name
-            if _current_video.raw_file.name != fallback_relative.as_posix():
-                _current_video.raw_file.name = fallback_relative.as_posix()
-                _current_video.save(update_fields=['raw_file'])
-                self.logger.info("Updated raw_file path using fallback: %s", fallback_relative.as_posix())
+            rel_path = stored_raw_path.relative_to(storage_root)
+        except Exception:
+            rel_path = Path("videos") / stored_raw_path.name
+        if _current_video.raw_file.name != rel_path.as_posix():
+            _current_video.raw_file.name = rel_path.as_posix()
+            _current_video.save(update_fields=["raw_file"])
+            self.logger.info("Updated raw_file path to: %s", rel_path.as_posix())
+        # --- Store for later stages ---
+        self.processing_context["raw_video_path"] = stored_raw_path
+        self.processing_context["video_filename"] = stored_raw_path.name
-        # Store paths for later processing
-        self.processing_context['raw_video_path'] = Path(stored_raw_path)
-        self.processing_context['video_filename'] = Path(stored_raw_path).name
     def _setup_processing_environment(self):
         """Setup the processing environment without file movement."""
@@ -405,7 +385,7 @@ class VideoImportService():
     def _process_frames_and_metadata(self):
         """Process frames and extract metadata with anonymization."""
         # Check frame cleaning availability
-        frame_cleaning_available, FrameCleaner, ReportReader = self._ensure_frame_cleaning_available()
+        frame_cleaning_available, frame_cleaner  = self._ensure_frame_cleaning_available()
         video = self._require_current_video()
         raw_file_field = video.raw_file
@@ -426,7 +406,7 @@ class VideoImportService():
             from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
             with ThreadPoolExecutor(max_workers=1) as executor:
-                future = executor.submit(self._perform_frame_cleaning, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi)
+                future = executor.submit(self._perform_frame_cleaning, endoscope_data_roi_nested, endoscope_image_roi)
                 try:
                     # Increased timeout to better accommodate ffmpeg + OCR
                     future.result(timeout=300)
@@ -472,6 +452,9 @@ class VideoImportService():
                 self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
     def _save_anonymized_video(self):
+        original_raw_file_path_to_delete = None
+        original_raw_frame_dir_to_delete = None
         video = self._require_current_video()
         anonymized_video_path = video.get_target_anonymized_video_path()
@@ -759,6 +742,17 @@ class VideoImportService():
         except Exception as exc:
             self.logger.error("Failed to retrieve processor ROI information: %s", exc)
+        # Convert dict to nested list if necessary to match return type
+        if isinstance(endoscope_data_roi_nested, dict):
+            # Convert dict[str, dict[str, int | None] | None] to List[List[Dict[str, Any]]]
+            converted_roi = []
+            for key, value in endoscope_data_roi_nested.items():
+                if isinstance(value, dict):
+                    converted_roi.append([value])
+                elif value is None:
+                    converted_roi.append([])
+            endoscope_data_roi_nested = converted_roi
         return endoscope_data_roi_nested, endoscope_image_roi
     def _ensure_default_patient_data(self, video_instance: VideoFile | None = None) -> None:
@@ -780,8 +774,6 @@ class VideoImportService():
                 sensitive_meta = SensitiveMeta.create_from_dict(default_data)
                 video.sensitive_meta = sensitive_meta
                 video.save(update_fields=["sensitive_meta"])
-                state = video.get_or_create_state()
-                state.mark_sensitive_meta_processed(save=True)
                 self.logger.info("Created default SensitiveMeta for video %s", video.uuid)
             except Exception as exc:
                 self.logger.error("Failed to create default SensitiveMeta for video %s: %s", video.uuid, exc)
@@ -820,67 +812,43 @@ class VideoImportService():
             Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
         """
         try:
-            # Check if we can find the lx-anonymizer directory
-            from importlib import resources
-            lx_anonymizer_path = resources.files("lx_anonymizer")
+            # Check if we can find lx-anonymizer
+            from lx_anonymizer import FrameCleaner  # type: ignore[import]
-            # make sure lx_anonymizer_path is a Path object
-            lx_anonymizer_path = Path(str(lx_anonymizer_path))
-            if lx_anonymizer_path.exists():
-                # Add to Python path temporarily
-                if str(lx_anonymizer_path) not in sys.path:
-                    sys.path.insert(0, str(lx_anonymizer_path))
-                # Try simple import
-                from lx_anonymizer import FrameCleaner, ReportReader
-                self.logger.info("Successfully imported lx_anonymizer modules")
-                # Remove from path to avoid conflicts
-                if str(lx_anonymizer_path) in sys.path:
-                    sys.path.remove(str(lx_anonymizer_path))
-                return True, FrameCleaner, ReportReader
-            else:
-                self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
+            if FrameCleaner:
+                return True, FrameCleaner
         except Exception as e:
-            self.logger.warning(f"Frame cleaning not available: {e}")
+            self.logger.warning(f"Frame cleaning not available: {e} Please install or update lx_anonymizer.")
-        return False, None, None
+        return False, None
-    def _perform_frame_cleaning(self, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi):
+    def _perform_frame_cleaning(self, endoscope_data_roi_nested, endoscope_image_roi):
         """Perform frame cleaning and anonymization."""
         # Instantiate frame cleaner
-        frame_cleaner = FrameCleaner()
+        is_available, frame_cleaner = self._ensure_frame_cleaning_available()
+        if not is_available:
+            raise RuntimeError("Frame cleaning not available")
         # Prepare parameters for frame cleaning
         raw_video_path = self.processing_context.get('raw_video_path')
         if not raw_video_path or not Path(raw_video_path).exists():
             raise RuntimeError(f"Raw video path not found: {raw_video_path}")
-        # Get processor name safely
-        video = self._require_current_video()
-        video_meta = getattr(video, "video_meta", None)
-        processor = getattr(video_meta, "processor", None) if video_meta else None
-        device_name = processor.name if processor else self.processing_context['processor_name']
         # Create temporary output path for cleaned video
         video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
         cleaned_filename = f"cleaned_{video_filename}"
         cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
-        # Processor roi is used later to OCR preknown regions.
         # Clean video with ROI masking (heavy I/O operation)
         actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
             video_path=Path(raw_video_path),
-            video_file_obj=video,
             endoscope_image_roi=endoscope_image_roi,
             endoscope_data_roi_nested=endoscope_data_roi_nested,
             output_path=cleaned_video_path,
@@ -1023,7 +991,7 @@ def import_and_anonymize(
     center_name: str,
     processor_name: str,
     save_video: bool = True,
-    delete_source: bool = False,
+    delete_source: bool = True,
 ) -> VideoFile | None:
     """Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
     Kept for backward compatibility with callers that import this function directly.

endoreg_db/tasks/video_processing_tasks.py CHANGED Viewed

@@ -53,7 +53,7 @@ def apply_video_mask_task(self, video_id: int, mask_type: str = 'device_default'
         self.update_state(state='PROGRESS', meta={'progress': 10, 'message': 'Setting up FrameCleaner...'})
         # Initialize FrameCleaner
-        cleaner = FrameCleaner(use_minicpm=True)
+        cleaner = FrameCleaner()
         # Determine mask configuration
         if mask_type == 'custom' and custom_mask:
@@ -110,14 +110,14 @@ def _setup_frame_removal(video_id: int, detection_engine: str):
     from lx_anonymizer.frame_cleaner import FrameCleaner
     from django.shortcuts import get_object_or_404
     video = get_object_or_404(VideoFile, pk=video_id)
-    video_path = Path(video.file.path)
+    video_path = Path(video.raw_file.path)
     if not video_path.exists():
         raise FileNotFoundError(f"Video file not found: {video_path}")
     output_dir = video_path.parent / "processed"
     output_dir.mkdir(exist_ok=True)
     output_path = output_dir / f"{video_path.stem}_cleaned{video_path.suffix}"
     use_minicpm = detection_engine == 'minicpm'
-    cleaner = FrameCleaner(use_minicpm=use_minicpm)
+    cleaner = FrameCleaner()
     return video, video_path, output_path, cleaner
 def _detect_sensitive_frames(self, cleaner, video_path, selection_method, manual_frames, total_frames):
@@ -257,7 +257,7 @@ def reprocess_video_task(self, video_id: int):
         self.update_state(state='PROGRESS', meta={'progress': 20, 'message': 'Initializing FrameCleaner...'})
         # Initialize FrameCleaner with optimal settings
-        cleaner = FrameCleaner(use_minicpm=True)
+        cleaner = FrameCleaner()
         # Create output path
         output_dir = video_path.parent / "processed"

{endoreg_db-0.8.3.1.dist-info → endoreg_db-0.8.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: endoreg-db
-Version: 0.8.3.1
+Version: 0.8.3.3
 Summary: EndoReg Db Django App
 Project-URL: Homepage, https://info.coloreg.de
 Project-URL: Repository, https://github.com/wg-lux/endoreg-db
@@ -33,7 +33,7 @@ Requires-Dist: huggingface-hub>=0.35.3
 Requires-Dist: icecream>=2.1.4
 Requires-Dist: librosa==0.11.0
 Requires-Dist: llvmlite>=0.44.0
-Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.7
+Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.8
 Requires-Dist: moviepy==2.2.1
 Requires-Dist: mypy>=1.16.0
 Requires-Dist: numpy>=2.2.3

{endoreg_db-0.8.3.1.dist-info → endoreg_db-0.8.3.3.dist-info}/RECORD RENAMED Viewed

@@ -248,6 +248,7 @@ endoreg_db/management/__init__.py,sha256=3dsK9Mizq1veuWTcvSOyWMFT9VI8wtyk-P2K9Ri
 endoreg_db/management/commands/__init__.py,sha256=Ch0jwQfNpOSr4O5KKMfYJ93dsesk1Afb-JtbRVyFXZs,21
 endoreg_db/management/commands/anonymize_video.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 endoreg_db/management/commands/check_auth.py,sha256=TPiYeCZ5QcqIvR33xhbqXunO2nrcNAmHb_izoMTqgpg,5390
+endoreg_db/management/commands/create_model_meta_from_huggingface.py,sha256=RUuoBjTzdchuMY6qcwBENN7FTyTygPTZQBZYWwhugDc,3925
 endoreg_db/management/commands/create_multilabel_model_meta.py,sha256=qeoyqcF2CWcnhniVRrlYbmJmwNwyZb-VQ0pjkr6arJU,7566
 endoreg_db/management/commands/fix_missing_patient_data.py,sha256=5TPUTOQwI2fVh3Zd88o4ne0R8N_V98k0GZsI1gW0kGM,7766
 endoreg_db/management/commands/fix_video_paths.py,sha256=7LLwc38oX3B_tYWbLJA43Li_KBO3m5Lyw0CF6YqN5rU,7145
@@ -289,6 +290,7 @@ endoreg_db/management/commands/load_unit_data.py,sha256=tcux-iL-ByT2ApgmHEkLllZS
 endoreg_db/management/commands/load_user_groups.py,sha256=D7SK2FvZEHoE4TIXNGCjDw5_12MH9bpGZvoS7eEv0Os,1031
 endoreg_db/management/commands/register_ai_model.py,sha256=KixTfuQR6TUfRmzB5GOos16BFOz7NL4TzLzBkgtPPgE,2510
 endoreg_db/management/commands/reset_celery_schedule.py,sha256=U-m_FNRTw6LAwJoT9RUE4qrhmQXm7AyFToPcHYyJpIE,386
+endoreg_db/management/commands/setup_endoreg_db.py,sha256=_mJkNB2IZNcgDQkOExUTkmmjp9qMwEiZH2KEJcyCi_Y,8635
 endoreg_db/management/commands/start_filewatcher.py,sha256=3jESBqRiYPa9f35--zd70qQaYnyT0tzRO_b_HJuyteQ,4093
 endoreg_db/management/commands/storage_management.py,sha256=NpToX59ndwTFNmnSoeppmiPdMvpjSHH7mAdIe4SvUoI,22396
 endoreg_db/management/commands/summarize_db_content.py,sha256=pOIz3qbY4Ktmh0zV_DKFx971VD0pPx027gCD7a47EL0,10766
@@ -462,7 +464,7 @@ endoreg_db/models/medical/risk/risk_type.py,sha256=kEugcaWSTEWH_Vxq4dcF80Iv1L4_K
 endoreg_db/models/metadata/__init__.py,sha256=8I6oLj3YTmeaPGJpL0AWG5gLwp38QzrEggxSkTisv7c,474
 endoreg_db/models/metadata/frame_ocr_result.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 endoreg_db/models/metadata/model_meta.py,sha256=F_r-PTLeNi4J-4EaGCQkGIguhdl7Bwba7_i56ZAjc-4,7589
-endoreg_db/models/metadata/model_meta_logic.py,sha256=27mqScxUTJXNUVc6CqAs5dXjspEsh0TWPmlxdJVulGc,12015
+endoreg_db/models/metadata/model_meta_logic.py,sha256=6w1YX8hVq40UXbVN1fvDO9OljwekBZaDVHEjVZecoV8,12252
 endoreg_db/models/metadata/pdf_meta.py,sha256=BTmpSgqxmPKi0apcNjyrZAS4AFKCPXVdBd6VBeyyv6E,3174
 endoreg_db/models/metadata/sensitive_meta.py,sha256=ekLHrW-b5uYcjfkRd0EW5ncx5ef8Bu-K6msDkpWCAbk,13034
 endoreg_db/models/metadata/sensitive_meta_logic.py,sha256=Oh7ssZQEPfKGfRMF5nXKJpOIxXx-Xibd3rpOu-bQilk,29988
@@ -600,10 +602,10 @@ endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CU
 endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
 endoreg_db/services/segment_sync.py,sha256=YgHvIHkbW4mqCu0ACf3zjRSZnNfxWwt4gh5syUVXuE0,6400
 endoreg_db/services/storage_aware_video_processor.py,sha256=kKFK64vXLeBSVkp1YJonU3gFDTeXZ8C4qb9QZZB99SE,13420
-endoreg_db/services/video_import.py,sha256=Ifl-x1WSlHEcA-Lzf75l_b84g8LqXXUA_OmENZhjv3A,47747
+endoreg_db/services/video_import.py,sha256=gDuVTW5WUYGSc0m5ly67cc10YpnTpBkxO7uOEcRa3Ok,45663
 endoreg_db/tasks/upload_tasks.py,sha256=OJq7DhNwcbWdXzHY8jz5c51BCVkPN5gSWOz-6Fx6W5M,7799
 endoreg_db/tasks/video_ingest.py,sha256=kxFuYkHijINV0VabQKCFVpJRv6eCAw07tviONurDgg8,5265
-endoreg_db/tasks/video_processing_tasks.py,sha256=KjcERRJ1TZzmavBpvr6OsvSTUViU0PR1ECWnEdzu2Js,14140
+endoreg_db/tasks/video_processing_tasks.py,sha256=rZ7Kr49bAR4Q-vALO2SURebrhcJ5hSFGwjF4aULrOao,14089
 endoreg_db/templates/timeline.html,sha256=H9VXKOecCzqcWWkpNIZXFI29ztg-oxV5uvxMglgoClk,6167
 endoreg_db/templates/admin/patient_finding_intervention.html,sha256=F3JUKm3HhWIf_xoZZ-SET5d5ZDlm2jMM8g909w1dnYc,10164
 endoreg_db/templates/admin/start_examination.html,sha256=3K4wirul9KNyB5mN9cpfCSCAyAD6ro19GwxFOY5sZ3A,267
@@ -784,7 +786,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
 endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
 endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
 endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
-endoreg_db-0.8.3.1.dist-info/METADATA,sha256=M6P6tLtoK5aa7AEUO9ZjJxAIe96STW10oY4grrihNYU,14758
-endoreg_db-0.8.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-endoreg_db-0.8.3.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-endoreg_db-0.8.3.1.dist-info/RECORD,,
+endoreg_db-0.8.3.3.dist-info/METADATA,sha256=anKqQ1fidx7S7ca0cWHU1UHEDNI67ujUV-RO4IGgr1g,14758
+endoreg_db-0.8.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+endoreg_db-0.8.3.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+endoreg_db-0.8.3.3.dist-info/RECORD,,

{endoreg_db-0.8.3.1.dist-info → endoreg_db-0.8.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{endoreg_db-0.8.3.1.dist-info → endoreg_db-0.8.3.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

endoreg-db 0.8.3.1__py3-none-any.whl → 0.8.3.3__py3-none-any.whl

Potentially problematic release.

endoreg-db 0.8.3.1py3-none-any.whl → 0.8.3.3py3-none-any.whl