PyPI - endoreg-db - Versions diffs - 0.8.2.8__py3-none-any.whl → 0.8.3.0__py3-none-any.whl - Mend

endoreg-db 0.8.2.8py3-none-any.whl → 0.8.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of endoreg-db might be problematic. Click here for more details.

Files changed (8) hide show

endoreg_db/models/media/video/pipe_1.py CHANGED Viewed

@@ -74,11 +74,20 @@ def _pipe_1(
                 except AiModel.DoesNotExist:
                     logger.error(f"Pipe 1 failed: Model '{model_name}' not found.")
                     return False
             except ModelMeta.DoesNotExist:
-                logger.error(
-                    f"Pipe 1 failed: ModelMeta version {model_meta_version} for model '{model_name}' not found."
-                )
-                return False
+                try:
+                    model_name = download_segmentation_model()
+                    ai_model_obj = AiModel.objects.get(name=model_name)
+                    if model_meta_version is not None:
+                        model_meta = ai_model_obj.metadata_versions.get(version=model_meta_version)
+                    else:
+                        model_meta = ModelMeta.setup_default_from_huggingface()
+                except ModelMeta.DoesNotExist:
+                    logger.error(
+                        f"Pipe 1 failed: ModelMeta version {model_meta_version} for model '{model_name}' not found."
+                    )
+                    return False
             try:
                 sequences: Optional[Dict[str, List[Tuple[int, int]]]] = video_file.predict_video(
                     model_meta=model_meta,

endoreg_db/models/metadata/model_meta.py CHANGED Viewed

@@ -18,7 +18,6 @@ from . import model_meta_logic as logic
 if TYPE_CHECKING:
     from endoreg_db.models import LabelSet, AiModel  # pylint: disable=import-outside-toplevel
-    from torch.nn import Module as TorchModule
 class ModelMetaManager(models.Manager):
@@ -128,6 +127,20 @@ class ModelMeta(models.Model):
             cls, meta_name, model_name, labelset_name, weights_file,
             requested_version, bump_if_exists, **kwargs
         )
+    @classmethod
+    def setup_default_from_huggingface(
+        cls: Type["ModelMeta"],
+        model_id: str = "wg-lux/colo_segmentation_RegNetX800MF_base",
+        labelset_name: Optional[str] = None,
+    ) -> "ModelMeta":
+        """
+        Downloads a pretrained model from Hugging Face and initializes ModelMeta automatically.
+        """
+        # If labelset_name is not provided, handle default logic here if needed
+        return logic.setup_default_from_huggingface_logic(cls, model_id, labelset_name)
     @classmethod
     def get_latest_version_number(cls: Type["ModelMeta"], meta_name: str, model_name: str) -> int:

endoreg_db/models/metadata/model_meta_logic.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import shutil
 from pathlib import Path
 from typing import Optional, TYPE_CHECKING, Any, Type
+from huggingface_hub import hf_hub_download
 from django.db import transaction
 # Assuming ModelMeta, AiModel, LabelSet are importable from the correct locations
@@ -234,3 +234,90 @@ def get_model_meta_by_name_version_logic(
             raise cls.DoesNotExist(
                 f"No ModelMeta found for '{meta_name}' and model '{model_name}'."
             )
+from huggingface_hub import model_info
+import re
+def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
+    """
+    Infers default model metadata (activation, normalization, input size)
+    from a Hugging Face model_id using its tags and architecture.
+    Returns:
+        A dict with fields: name, activation, mean, std, size_x, size_y
+    """
+    if not (info := model_info(model_id)):
+        logger.info(f"Could not retrieve model info for {model_id}, using ColoReg segmentation defaults.")
+        return {
+            "name": "wg-lux/colo_segmentation_RegNetX800MF_base",
+            "activation": "sigmoid",
+            "mean": (0.45211223, 0.27139644, 0.19264949),
+            "std": (0.31418097, 0.21088019, 0.16059452),
+            "size_x": 716,
+            "size_y": 716,
+            "description": f"Defaults for unknown model {model_id}",
+        }
+    # Extract architecture from tags or model_id ---
+    tags = info.tags or []
+    model_name = model_id.split("/")[-1].lower()
+    # Heuristics for architecture and task
+    architecture = next((t for t in tags if t.startswith("architecture:")), None)
+    task = next((t for t in tags if t.startswith("task:")), None)
+    # Default values
+    activation = "sigmoid"
+    size_x = size_y = 716
+    mean = (0.45211223, 0.27139644, 0.19264949)
+    std = (0.31418097, 0.21088019, 0.16059452)
+    # --- 2. Task-based inference ---
+    if task:
+        if "segmentation" in task or "detection" in task:
+            activation = "sigmoid"
+        elif any(k in task for k in ["classification"]):
+            activation = "softmax"
+    # --- 3. Architecture-based inference ---
+    if architecture:
+        arch = architecture.replace("architecture:", "")
+    else:
+        arch = re.sub(r"[^a-z0-9]+", "_", model_name)
+    return {
+        "name": arch,
+        "activation": activation,
+        "mean": mean,
+        "std": std,
+        "size_x": size_x,
+        "size_y": size_y,
+        "description": f"Inferred defaults for {model_id}",
+    }
+def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str | None = None):
+    """
+    Downloads model weights from Hugging Face and auto-fills ModelMeta fields.
+    """
+    meta = infer_default_model_meta_from_hf(model_id)
+    # Download weights
+    weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=WEIGHTS_DIR)
+    ai_model, _ = AiModel.objects.get_or_create(name=meta["name"])
+    labelset = LabelSet.objects.first() if not labelset_name else LabelSet.objects.get(name=labelset_name)
+    return create_from_file_logic(
+        cls,
+        meta_name=meta["name"],
+        model_name=ai_model.name,
+        labelset_name=labelset.name,
+        weights_file=weights_path,
+        activation=meta["activation"],
+        mean=meta["mean"],
+        std=meta["std"],
+        size_x=meta["size_x"],
+        size_y=meta["size_y"],
+        description=meta["description"],
+    )

endoreg_db/services/video_import.py CHANGED Viewed

@@ -8,26 +8,24 @@ Changelog:
     October 14, 2025: Added file locking mechanism to prevent race conditions
                       during concurrent video imports (matches PDF import pattern)
 """
+from datetime import date
 import logging
+import sys
 import os
-import random
 import shutil
-import sys
 import time
 from contextlib import contextmanager
-from datetime import date
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import Union, Dict, Any, Optional, List, Tuple
 from django.db import transaction
-from django.db.models.fields.files import FieldFile
+from endoreg_db.models import VideoFile, SensitiveMeta
+from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
+import random
 from lx_anonymizer.ocr import trocr_full_image_ocr
-from endoreg_db.models import SensitiveMeta, VideoFile
-from endoreg_db.models.media.video.video_file_anonymize import _anonymize, _cleanup_raw_assets
 from endoreg_db.utils.hashs import get_video_hash
-from endoreg_db.utils.paths import ANONYM_VIDEO_DIR, RAW_FRAME_DIR, STORAGE_DIR, VIDEO_DIR
+from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
+from typing import TYPE_CHECKING
+from django.db.models.fields.files import FieldFile
 if TYPE_CHECKING:
     from endoreg_db.models import EndoscopyProcessor
@@ -39,36 +37,37 @@ MAX_LOCK_WAIT_SECONDS = 90  # New: wait up to 90s for a non-stale lock to clear
 logger = logging.getLogger(__name__)
-class VideoImportService:
+class VideoImportService():
     """
     Service for importing and anonymizing video files.
     Uses a central video instance pattern for cleaner state management.
     Features (October 14, 2025):
         - File locking to prevent concurrent processing of the same video
         - Stale lock detection and reclamation (600s timeout)
         - Hash-based duplicate detection
         - Graceful fallback processing without lx_anonymizer
     """
     def __init__(self, project_root: Optional[Path] = None):
         # Set up project root path
         if project_root:
             self.project_root = Path(project_root)
         else:
             self.project_root = Path(__file__).parent.parent.parent.parent
         # Track processed files to prevent duplicates
         self.processed_files = set(str(file) for file in os.listdir(ANONYM_VIDEO_DIR))
         self.STORAGE_DIR = STORAGE_DIR
         # Central video instance and processing context
         self.current_video: Optional[VideoFile] = None
         self.processing_context: Dict[str, Any] = {}
         self.delete_source = False
         self.logger = logging.getLogger(__name__)
     def _require_current_video(self) -> VideoFile:
@@ -76,12 +75,12 @@ class VideoImportService:
         if self.current_video is None:
             raise RuntimeError("Current video instance is not set")
         return self.current_video
     @contextmanager
     def _file_lock(self, path: Path):
         """
         Create a file lock to prevent duplicate processing of the same video.
         This context manager creates a .lock file alongside the video file.
         If the lock file already exists, it checks if it's stale (older than
         STALE_LOCK_SECONDS) and reclaims it if necessary. If it's not stale,
@@ -105,21 +104,24 @@ class VideoImportService:
                     except FileNotFoundError:
                         # Race: lock removed between exists and stat; retry acquire in next loop
                         age = None
                     if age is not None and age > STALE_LOCK_SECONDS:
                         try:
-                            logger.warning("Stale lock detected for %s (age %.0fs). Reclaiming lock...", path, age)
+                            logger.warning(
+                                "Stale lock detected for %s (age %.0fs). Reclaiming lock...",
+                                path, age
+                            )
                             lock_path.unlink()
                         except Exception as e:
                             logger.warning("Failed to remove stale lock %s: %s", lock_path, e)
                         # Loop continues and retries acquire immediately
                         continue
                     # Not stale: wait until deadline, then give up gracefully
                     if time.time() >= deadline:
                         raise ValueError(f"File already being processed: {path}")
                     time.sleep(1.0)
             os.write(fd, b"lock")
             os.close(fd)
             fd = None
@@ -132,11 +134,11 @@ class VideoImportService:
                     lock_path.unlink()
             except OSError:
                 pass
     def processed(self) -> bool:
         """Indicates if the current file has already been processed."""
-        return getattr(self, "_processed", False)
+        return getattr(self, '_processed', False)
     def import_and_anonymize(
         self,
         file_path: Union[Path, str],
@@ -151,8 +153,9 @@ class VideoImportService:
         """
         try:
             # Initialize processing context
-            self._initialize_processing_context(file_path, center_name, processor_name, save_video, delete_source)
+            self._initialize_processing_context(file_path, center_name, processor_name,
+                                               save_video, delete_source)
             # Validate and prepare file (may raise ValueError if another worker holds a non-stale lock)
             try:
                 self._validate_and_prepare_file()
@@ -162,24 +165,27 @@ class VideoImportService:
                     self.logger.info(f"Skipping {file_path}: {ve}")
                     return None
                 raise
-            # Create or retrieve video instance FIRST
+            # Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
+            self._create_sensitive_file()
+            # Create or retrieve video instance
             self._create_or_retrieve_video_instance()
             # Setup processing environment
             self._setup_processing_environment()
             # Process frames and metadata
             self._process_frames_and_metadata()
             # Finalize processing
             self._finalize_processing()
             # Move files and cleanup
             self._cleanup_and_archive()
             return self.current_video
         except Exception as e:
             self.logger.error(f"Video import and anonymization failed for {file_path}: {e}")
             self._cleanup_on_error()
@@ -187,93 +193,94 @@ class VideoImportService:
         finally:
             self._cleanup_processing_context()
-    def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str, processor_name: str, save_video: bool, delete_source: bool):
+    def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str,
+                                     processor_name: str, save_video: bool, delete_source: bool):
         """Initialize the processing context for the current video import."""
         self.processing_context = {
-            "file_path": Path(file_path),
-            "center_name": center_name,
-            "processor_name": processor_name,
-            "save_video": save_video,
-            "delete_source": delete_source,
-            "processing_started": False,
-            "frames_extracted": False,
-            "anonymization_completed": False,
-            "error_reason": None,
+            'file_path': Path(file_path),
+            'center_name': center_name,
+            'processor_name': processor_name,
+            'save_video': save_video,
+            'delete_source': delete_source,
+            'processing_started': False,
+            'frames_extracted': False,
+            'anonymization_completed': False,
+            'error_reason': None
         }
         self.logger.info(f"Initialized processing context for: {file_path}")
     def _validate_and_prepare_file(self):
         """
         Validate the video file and prepare for processing.
         Uses file locking to prevent concurrent processing of the same video file.
         This prevents race conditions where multiple workers might try to process
         the same video simultaneously.
         The lock is acquired here and held for the entire import process.
         See _file_lock() for lock reclamation logic.
         """
-        file_path = self.processing_context["file_path"]
+        file_path = self.processing_context['file_path']
         # Acquire file lock to prevent concurrent processing
         # Lock will be held until finally block in import_and_anonymize()
-        self.processing_context["_lock_context"] = self._file_lock(file_path)
-        self.processing_context["_lock_context"].__enter__()
+        self.processing_context['_lock_context'] = self._file_lock(file_path)
+        self.processing_context['_lock_context'].__enter__()
         self.logger.info("Acquired file lock for: %s", file_path)
         # Check if already processed (memory-based check)
         if str(file_path) in self.processed_files:
             self.logger.info("File %s already processed, skipping", file_path)
             self._processed = True
             raise ValueError(f"File already processed: {file_path}")
         # Check file exists
         if not file_path.exists():
             raise FileNotFoundError(f"Video file not found: {file_path}")
         self.logger.info("File validation completed for: %s", file_path)
     def _create_or_retrieve_video_instance(self):
         """Create or retrieve the VideoFile instance and move to final storage."""
         # Removed duplicate import of VideoFile (already imported at module level)
         self.logger.info("Creating VideoFile instance...")
         self.current_video = VideoFile.create_from_file_initialized(
-            file_path=self.processing_context["file_path"],
-            center_name=self.processing_context["center_name"],
-            processor_name=self.processing_context["processor_name"],
-            delete_source=self.processing_context["delete_source"],
-            save_video_file=self.processing_context["save_video"],
+            file_path=self.processing_context['file_path'],
+            center_name=self.processing_context['center_name'],
+            processor_name=self.processing_context['processor_name'],
+            delete_source=self.processing_context['delete_source'],
+            save_video_file=self.processing_context['save_video'],
         )
         if not self.current_video:
             raise RuntimeError("Failed to create VideoFile instance")
         # Immediately move to final storage locations
         self._move_to_final_storage()
         self.logger.info("Created VideoFile with UUID: %s", self.current_video.uuid)
         # Get and mark processing state
         state = VideoFile.get_or_create_state(self.current_video)
         if not state:
             raise RuntimeError("Failed to create VideoFile state")
         state.mark_processing_started(save=True)
-        self.processing_context["processing_started"] = True
+        self.processing_context['processing_started'] = True
     def _move_to_final_storage(self):
         """
         Move video from raw_videos to final storage locations.
-        - Raw video → /data/videos (raw_file_path)
+        - Raw video → /data/videos (raw_file_path)
         - Processed video will later → /data/anonym_videos (file_path)
         """
         from endoreg_db.utils import data_paths
-        source_path = self.processing_context["file_path"]
+        source_path = self.processing_context['file_path']
         videos_dir = data_paths["video"]
         videos_dir.mkdir(parents=True, exist_ok=True)
@@ -313,7 +320,7 @@ class VideoImportService:
             filename = f"{uuid_str}{source_suffix}" if uuid_str else Path(source_path).name
             stored_raw_path = videos_dir / filename
-        delete_source = bool(self.processing_context.get("delete_source"))
+        delete_source = bool(self.processing_context.get('delete_source'))
         stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
         if not stored_raw_path.exists():
@@ -345,19 +352,19 @@ class VideoImportService:
             relative_path = Path(stored_raw_path).relative_to(storage_root)
             if _current_video.raw_file.name != str(relative_path):
                 _current_video.raw_file.name = str(relative_path)
-                _current_video.save(update_fields=["raw_file"])
+                _current_video.save(update_fields=['raw_file'])
                 self.logger.info("Updated raw_file path to: %s", relative_path)
         except Exception as e:
             self.logger.error("Failed to ensure raw_file path is relative: %s", e)
             fallback_relative = Path("videos") / Path(stored_raw_path).name
             if _current_video.raw_file.name != fallback_relative.as_posix():
                 _current_video.raw_file.name = fallback_relative.as_posix()
-                _current_video.save(update_fields=["raw_file"])
+                _current_video.save(update_fields=['raw_file'])
                 self.logger.info("Updated raw_file path using fallback: %s", fallback_relative.as_posix())
         # Store paths for later processing
-        self.processing_context["raw_video_path"] = Path(stored_raw_path)
-        self.processing_context["video_filename"] = Path(stored_raw_path).name
+        self.processing_context['raw_video_path'] = Path(stored_raw_path)
+        self.processing_context['video_filename'] = Path(stored_raw_path).name
     def _setup_processing_environment(self):
         """Setup the processing environment without file movement."""
@@ -368,32 +375,32 @@ class VideoImportService:
         # Initialize frame objects in database
         video.initialize_frames()
         # Extract frames BEFORE processing to prevent pipeline 1 conflicts
         self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
         try:
             frames_extracted = video.extract_frames(overwrite=False)
             if frames_extracted:
-                self.processing_context["frames_extracted"] = True
+                self.processing_context['frames_extracted'] = True
                 self.logger.info("Frame extraction completed successfully")
                 # CRITICAL: Immediately save the frames_extracted state to database
                 # to prevent refresh_from_db() in pipeline 1 from overriding it
                 state = video.get_or_create_state()
                 if not state.frames_extracted:
                     state.frames_extracted = True
-                    state.save(update_fields=["frames_extracted"])
+                    state.save(update_fields=['frames_extracted'])
                     self.logger.info("Persisted frames_extracted=True to database")
             else:
                 self.logger.warning("Frame extraction failed, but continuing...")
-                self.processing_context["frames_extracted"] = False
+                self.processing_context['frames_extracted'] = False
         except Exception as e:
             self.logger.warning(f"Frame extraction failed during setup: {e}, but continuing...")
-            self.processing_context["frames_extracted"] = False
+            self.processing_context['frames_extracted'] = False
         # Ensure default patient data
         self._ensure_default_patient_data(video_instance=video)
         self.logger.info("Processing environment setup completed")
     def _process_frames_and_metadata(self):
@@ -412,24 +419,25 @@ class VideoImportService:
         try:
             self.logger.info("Starting frame-level anonymization with processor ROI masking...")
             # Get processor ROI information
             endoscope_data_roi_nested, endoscope_image_roi = self._get_processor_roi_info()
             # Perform frame cleaning with timeout to prevent blocking
-            from concurrent.futures import ThreadPoolExecutor
-            from concurrent.futures import TimeoutError as FutureTimeoutError
+            from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
             with ThreadPoolExecutor(max_workers=1) as executor:
                 future = executor.submit(self._perform_frame_cleaning, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi)
                 try:
                     # Increased timeout to better accommodate ffmpeg + OCR
                     future.result(timeout=300)
-                    self.processing_context["anonymization_completed"] = True
+                    self.processing_context['anonymization_completed'] = True
                     self.logger.info("Frame cleaning completed successfully within timeout")
                 except FutureTimeoutError:
                     self.logger.warning("Frame cleaning timed out; entering grace period check for cleaned output")
                     # Grace period: detect if cleaned file appears shortly after timeout
+                    raw_video_path = self.processing_context.get('raw_video_path')
+                    video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
                     grace_seconds = 60
                     expected_cleaned_path: Optional[Path] = None
                     processed_field = video.processed_file
@@ -442,8 +450,8 @@ class VideoImportService:
                     if expected_cleaned_path is not None:
                         for _ in range(grace_seconds):
                             if expected_cleaned_path.exists():
-                                self.processing_context["cleaned_video_path"] = expected_cleaned_path
-                                self.processing_context["anonymization_completed"] = True
+                                self.processing_context['cleaned_video_path'] = expected_cleaned_path
+                                self.processing_context['anonymization_completed'] = True
                                 self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
                                 found = True
                                 break
@@ -461,8 +469,8 @@ class VideoImportService:
             except Exception as fallback_error:
                 self.logger.error("Fallback anonymization also failed: %s", fallback_error)
                 # If even fallback fails, mark as not anonymized but continue import
-                self.processing_context["anonymization_completed"] = False
-                self.processing_context["error_reason"] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
+                self.processing_context['anonymization_completed'] = False
+                self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
     def _save_anonymized_video(self):
         video = self._require_current_video()
@@ -473,7 +481,9 @@ class VideoImportService:
         new_processed_hash = get_video_hash(anonymized_video_path)
         if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
-            raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid}).")
+            raise ValueError(
+                f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
+            )
         video.processed_video_hash = new_processed_hash
         video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
@@ -492,11 +502,11 @@ class VideoImportService:
             update_fields.extend(["raw_file", "video_hash"])
-            transaction.on_commit(
-                lambda: _cleanup_raw_assets(
-                    video_uuid=video.uuid, raw_file_path=original_raw_file_path_to_delete, raw_frame_dir=original_raw_frame_dir_to_delete
-                )
-            )
+            transaction.on_commit(lambda: _cleanup_raw_assets(
+                video_uuid=video.uuid,
+                raw_file_path=original_raw_file_path_to_delete,
+                raw_frame_dir=original_raw_frame_dir_to_delete
+            ))
         video.save(update_fields=update_fields)
         video.state.mark_anonymized(save=True)
@@ -513,35 +523,21 @@ class VideoImportService:
             video = self.current_video
             if video is None:
                 self.logger.warning("No VideoFile instance available for fallback anonymization")
-            else:
-                # Try VideoFile.pipe_2() method if available
-                if hasattr(video, "pipe_2"):
-                    self.logger.info("Trying VideoFile.pipe_2() method...")
-                    if video.pipe_2():
-                        self.logger.info("VideoFile.pipe_2() succeeded")
-                        self.processing_context["anonymization_completed"] = True
-                        return
-                    self.logger.warning("VideoFile.pipe_2() returned False")
-                # Try direct anonymization via _anonymize
-                if _anonymize(video, delete_original_raw=self.delete_source):
-                    self.logger.info("VideoFile._anonymize() succeeded")
-                    self.processing_context["anonymization_completed"] = True
-                    return
             # Strategy 2: Simple copy (no processing, just copy raw to processed)
             self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
-            self.processing_context["anonymization_completed"] = False
-            self.processing_context["use_raw_as_processed"] = True
+            self.processing_context['anonymization_completed'] = False
+            self.processing_context['use_raw_as_processed'] = True
             self.logger.warning("Fallback: Video will be imported without anonymization (raw copy used)")
         except Exception as e:
             self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
-            self.processing_context["anonymization_completed"] = False
-            self.processing_context["error_reason"] = str(e)
+            self.processing_context['anonymization_completed'] = False
+            self.processing_context['error_reason'] = str(e)
     def _finalize_processing(self):
         """Finalize processing and update video state."""
         self.logger.info("Updating video processing state...")
         with transaction.atomic():
             video = self._require_current_video()
             try:
@@ -550,33 +546,36 @@ class VideoImportService:
                 self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
             state = video.get_or_create_state()
             # Only mark frames as extracted if they were successfully extracted
-            if self.processing_context.get("frames_extracted", False):
+            if self.processing_context.get('frames_extracted', False):
                 state.frames_extracted = True
                 self.logger.info("Marked frames as extracted in state")
             else:
                 self.logger.warning("Frames were not extracted, not updating state")
             # Always mark these as true (metadata extraction attempts were made)
             state.frames_initialized = True
             state.video_meta_extracted = True
             state.text_meta_extracted = True
             # ✅ FIX: Only mark as processed if anonymization actually completed
-            anonymization_completed = self.processing_context.get("anonymization_completed", False)
+            anonymization_completed = self.processing_context.get('anonymization_completed', False)
             if anonymization_completed:
                 state.mark_sensitive_meta_processed(save=False)
                 self.logger.info("Anonymization completed - marking sensitive meta as processed")
             else:
-                self.logger.warning(f"Anonymization NOT completed - NOT marking as processed. Reason: {self.processing_context.get('error_reason', 'Unknown')}")
+                self.logger.warning(
+                    "Anonymization NOT completed - NOT marking as processed. "
+                    f"Reason: {self.processing_context.get('error_reason', 'Unknown')}"
+                )
                 # Explicitly mark as NOT processed
                 state.sensitive_meta_processed = False
             # Save all state changes
             state.save()
             self.logger.info("Video processing state updated")
         # Signal completion
         self._signal_completion()
@@ -590,12 +589,12 @@ class VideoImportService:
         video = self._require_current_video()
         processed_video_path = None
-        if "cleaned_video_path" in self.processing_context:
-            processed_video_path = self.processing_context["cleaned_video_path"]
+        if 'cleaned_video_path' in self.processing_context:
+            processed_video_path = self.processing_context['cleaned_video_path']
         else:
-            raw_video_path = self.processing_context.get("raw_video_path")
+            raw_video_path = self.processing_context.get('raw_video_path')
             if raw_video_path and Path(raw_video_path).exists():
-                video_filename = self.processing_context.get("video_filename", Path(raw_video_path).name)
+                video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
                 processed_filename = f"processed_{video_filename}"
                 processed_video_path = Path(raw_video_path).parent / processed_filename
                 try:
@@ -624,13 +623,13 @@ class VideoImportService:
                     except Exception as exc:
                         self.logger.error("Failed to update processed_file path: %s", exc)
                         video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
-                        video.save(update_fields=["processed_file"])
+                        video.save(update_fields=['processed_file'])
                         self.logger.info(
                             "Updated processed_file path using fallback: %s",
                             f"anonym_videos/{anonym_video_filename}",
                         )
-                    self.processing_context["anonymization_completed"] = True
+                    self.processing_context['anonymization_completed'] = True
                 else:
                     self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
             except Exception as exc:
@@ -640,14 +639,13 @@ class VideoImportService:
         try:
             from endoreg_db.utils.paths import RAW_FRAME_DIR
             shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
             self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
         except Exception as exc:
             self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
-        source_path = self.processing_context["file_path"]
-        if self.processing_context["delete_source"] and Path(source_path).exists():
+        source_path = self.processing_context['file_path']
+        if self.processing_context['delete_source'] and Path(source_path).exists():
             try:
                 os.remove(source_path)
                 self.logger.info("Removed remaining source file: %s", source_path)
@@ -658,25 +656,25 @@ class VideoImportService:
             self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
             try:
                 video.anonymize(delete_original_raw=self.delete_source)
-                video.save(update_fields=["processed_file"])
+                video.save(update_fields=['processed_file'])
                 self.logger.info("Late-stage anonymization succeeded")
             except Exception as e:
                 self.logger.error("Late-stage anonymization failed: %s", e)
-                self.processing_context["anonymization_completed"] = False
+                self.processing_context['anonymization_completed'] = False
         self.logger.info("Cleanup and archiving completed")
-        self.processed_files.add(str(self.processing_context["file_path"]))
+        self.processed_files.add(str(self.processing_context['file_path']))
         with transaction.atomic():
             video.refresh_from_db()
-            if hasattr(video, "state") and self.processing_context.get("anonymization_completed"):
+            if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
                 video.state.mark_sensitive_meta_processed(save=True)
         self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
         self.logger.info("Raw video stored in: /data/videos")
         self.logger.info("Processed video stored in: /data/anonym_videos")
     def _create_sensitive_file(
         self,
         video_instance: VideoFile | None = None,
@@ -739,7 +737,7 @@ class VideoImportService:
         self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
         return target_file_path
-    def _get_processor_roi_info(self) -> Tuple[Optional[Any], Optional[Dict[str, Any]]]:
+    def _get_processor_roi_info(self) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
         """Get processor ROI information for masking."""
         endoscope_data_roi_nested = None
         endoscope_image_roi = None
@@ -813,67 +811,73 @@ class VideoImportService:
                 except Exception as exc:
                     self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
     def _ensure_frame_cleaning_available(self):
         """
         Ensure frame cleaning modules are available by adding lx-anonymizer to path.
         Returns:
             Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
         """
         try:
             # Check if we can find the lx-anonymizer directory
             from importlib import resources
             lx_anonymizer_path = resources.files("lx_anonymizer")
             # make sure lx_anonymizer_path is a Path object
             lx_anonymizer_path = Path(str(lx_anonymizer_path))
             if lx_anonymizer_path.exists():
                 # Add to Python path temporarily
                 if str(lx_anonymizer_path) not in sys.path:
                     sys.path.insert(0, str(lx_anonymizer_path))
                 # Try simple import
                 from lx_anonymizer import FrameCleaner, ReportReader
                 self.logger.info("Successfully imported lx_anonymizer modules")
                 # Remove from path to avoid conflicts
                 if str(lx_anonymizer_path) in sys.path:
                     sys.path.remove(str(lx_anonymizer_path))
                 return True, FrameCleaner, ReportReader
             else:
-                self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
+                self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
         except Exception as e:
             self.logger.warning(f"Frame cleaning not available: {e}")
         return False, None, None
     def _perform_frame_cleaning(self, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi):
         """Perform frame cleaning and anonymization."""
         # Instantiate frame cleaner
         frame_cleaner = FrameCleaner()
         # Prepare parameters for frame cleaning
-        raw_video_path = self.processing_context.get("raw_video_path")
+        raw_video_path = self.processing_context.get('raw_video_path')
         if not raw_video_path or not Path(raw_video_path).exists():
             raise RuntimeError(f"Raw video path not found: {raw_video_path}")
-        # Create temporary output path for cleaned video
+        # Get processor name safely
         video = self._require_current_video()
-        video_filename = self.processing_context.get("video_filename", Path(raw_video_path).name)
+        video_meta = getattr(video, "video_meta", None)
+        processor = getattr(video_meta, "processor", None) if video_meta else None
+        device_name = processor.name if processor else self.processing_context['processor_name']
+        # Create temporary output path for cleaned video
+        video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
         cleaned_filename = f"cleaned_{video_filename}"
         cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
-        processor_roi, endoscope_roi = self._get_processor_roi_info()
-        # Processor roi can be used later to OCR preknown regions.
+        # Processor roi is used later to OCR preknown regions.
         # Clean video with ROI masking (heavy I/O operation)
         actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
             video_path=Path(raw_video_path),
@@ -881,39 +885,18 @@ class VideoImportService:
             endoscope_image_roi=endoscope_image_roi,
             endoscope_data_roi_nested=endoscope_data_roi_nested,
             output_path=cleaned_video_path,
-            technique="mask_overlay",
+            technique="mask_overlay"
         )
-        # Optional: enrich metadata using TrOCR+LLM on one random extracted frame
-        try:
-            # Prefer frames belonging to this video (UUID in path), else pick any frame
-            frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
-            video_uuid = str(video.uuid)
-            filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
-            if filtered:
-                sample_frame = random.choice(filtered)
-                ocr_text = trocr_full_image_ocr(sample_frame)
-                if ocr_text:
-                    llm_metadata = frame_cleaner.extract_metadata(ocr_text)
-                    if llm_metadata:
-                        # Merge with already extracted frame-level metadata
-                        extracted_metadata = frame_cleaner.frame_metadata_extractor.merge_metadata(extracted_metadata or {}, llm_metadata)
-                        self.logger.info("LLM metadata extraction (random frame) successful")
-                    else:
-                        self.logger.info("LLM metadata extraction (random frame) found no data")
-                else:
-                    self.logger.info("No text extracted by TrOCR on random frame")
-        except Exception as e:
-            self.logger.error(f"LLM metadata enrichment step failed: {e}")
         # Store cleaned video path for later use in _cleanup_and_archive
-        self.processing_context["cleaned_video_path"] = actual_cleaned_path
-        self.processing_context["extracted_metadata"] = extracted_metadata
+        self.processing_context['cleaned_video_path'] = actual_cleaned_path
+        self.processing_context['extracted_metadata'] = extracted_metadata
         # Update sensitive metadata with extracted information
         self._update_sensitive_metadata(extracted_metadata)
         self.logger.info(f"Extracted metadata from frame cleaning: {extracted_metadata}")
         self.logger.info(f"Frame cleaning with ROI masking completed: {actual_cleaned_path}")
         self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
@@ -931,13 +914,13 @@ class VideoImportService:
         sm = sensitive_meta
         updated_fields = []
         try:
             sm.update_from_dict(extracted_metadata)
             updated_fields = list(extracted_metadata.keys())
         except KeyError as e:
             self.logger.warning(f"Failed to update SensitiveMeta field {e}")
         if updated_fields:
             sm.save(update_fields=updated_fields)
             self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
@@ -961,18 +944,22 @@ class VideoImportService:
                 except (ValueError, OSError):
                     raw_exists = False
-            video_processing_complete = video.sensitive_meta is not None and video.video_meta is not None and raw_exists
+            video_processing_complete = (
+                video.sensitive_meta is not None and
+                video.video_meta is not None and
+                raw_exists
+            )
             if video_processing_complete:
                 self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
                 # Update completion flags if they exist
                 completion_fields = []
-                for field_name in ["import_completed", "processing_complete", "ready_for_validation"]:
+                for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
                     if hasattr(video, field_name):
                         setattr(video, field_name, True)
                         completion_fields.append(field_name)
                 if completion_fields:
                     video.save(update_fields=completion_fields)
                     self.logger.info("Updated completion flags: %s", completion_fields)
@@ -981,15 +968,15 @@ class VideoImportService:
                     "Video %s processing incomplete - missing required components",
                     video.uuid,
                 )
         except Exception as e:
             self.logger.warning(f"Failed to signal completion status: {e}")
     def _cleanup_on_error(self):
         """Cleanup processing context on error."""
-        if self.current_video and hasattr(self.current_video, "state"):
+        if self.current_video and hasattr(self.current_video, 'state'):
             try:
-                if self.processing_context.get("processing_started"):
+                if self.processing_context.get('processing_started'):
                     self.current_video.state.frames_extracted = False
                     self.current_video.state.frames_initialized = False
                     self.current_video.state.video_meta_extracted = False
@@ -1001,28 +988,29 @@ class VideoImportService:
     def _cleanup_processing_context(self):
         """
         Cleanup processing context and release file lock.
         This method is always called in the finally block of import_and_anonymize()
         to ensure the file lock is released even if processing fails.
         """
         try:
             # Release file lock if it was acquired
-            lock_context = self.processing_context.get("_lock_context")
+            lock_context = self.processing_context.get('_lock_context')
             if lock_context is not None:
                 try:
                     lock_context.__exit__(None, None, None)
                     self.logger.info("Released file lock")
                 except Exception as e:
                     self.logger.warning(f"Error releasing file lock: {e}")
             # Remove file from processed set if processing failed
-            file_path = self.processing_context.get("file_path")
-            if file_path and not self.processing_context.get("anonymization_completed"):
+            file_path = self.processing_context.get('file_path')
+            if file_path and not self.processing_context.get('anonymization_completed'):
                 file_path_str = str(file_path)
                 if file_path_str in self.processed_files:
                     self.processed_files.remove(file_path_str)
                     self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
         except Exception as e:
             self.logger.warning(f"Error during context cleanup: {e}")
         finally:
@@ -1030,7 +1018,6 @@ class VideoImportService:
             self.current_video = None
             self.processing_context = {}
 # Convenience function for callers/tests that expect a module-level import_and_anonymize
 def import_and_anonymize(
     file_path,
@@ -1049,4 +1036,4 @@ def import_and_anonymize(
         processor_name=processor_name,
         save_video=save_video,
         delete_source=delete_source,
-    )
+    )

{endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: endoreg-db
-Version: 0.8.2.8
+Version: 0.8.3.0
 Summary: EndoReg Db Django App
 Project-URL: Homepage, https://info.coloreg.de
 Project-URL: Repository, https://github.com/wg-lux/endoreg-db
@@ -29,10 +29,11 @@ Requires-Dist: dotenv>=0.9.9
 Requires-Dist: faker>=37.6.0
 Requires-Dist: flake8>=7.3.0
 Requires-Dist: gunicorn>=23.0.0
+Requires-Dist: huggingface-hub>=0.35.3
 Requires-Dist: icecream>=2.1.4
 Requires-Dist: librosa==0.11.0
 Requires-Dist: llvmlite>=0.44.0
-Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.5
+Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.7
 Requires-Dist: moviepy==2.2.1
 Requires-Dist: mypy>=1.16.0
 Requires-Dist: numpy>=2.2.3

{endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/RECORD RENAMED Viewed

@@ -384,7 +384,7 @@ endoreg_db/models/media/pdf/report_reader/report_reader_config.py,sha256=wYVDmPS
 endoreg_db/models/media/pdf/report_reader/report_reader_flag.py,sha256=j9tjbLRenxpWfeaseALl8rV2Dqem9YaM_duS1iJkARU,536
 endoreg_db/models/media/video/__init__.py,sha256=ifW4SXXN2q6wAuFwSP7XlYskpX7UX6uy0py5mpCCOCM,211
 endoreg_db/models/media/video/create_from_file.py,sha256=3n4bbzFteEOFDUuEikP0x-StCKI5R5IhyKC7o3kLZ6Y,15128
-endoreg_db/models/media/video/pipe_1.py,sha256=hOII3BCiMpxgpDKpV5h52-O2XTqKlb4YArxuH2fWyck,9402
+endoreg_db/models/media/video/pipe_1.py,sha256=yUzTi0pkw2ISsOoFpLmNky6S_V-TEWMxXmPLfB7gUpA,9899
 endoreg_db/models/media/video/pipe_2.py,sha256=DnMxW0uOqSsf7-0n9Rlvn7u89U4Jpkv7n6hFpQfUjkQ,4964
 endoreg_db/models/media/video/refactor_plan.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 endoreg_db/models/media/video/video_file.py,sha256=txlxR8d1OBgt3UEkWvLcGSyLarh0jXLw-z0SAV5KOok,26789
@@ -461,8 +461,8 @@ endoreg_db/models/medical/risk/risk.py,sha256=g5pgAfCfsvH88nbmX3xsASF3OZgNA-G6NJ
 endoreg_db/models/medical/risk/risk_type.py,sha256=kEugcaWSTEWH_Vxq4dcF80Iv1L4_Kk1JKJGQMgz_s0o,1350
 endoreg_db/models/metadata/__init__.py,sha256=8I6oLj3YTmeaPGJpL0AWG5gLwp38QzrEggxSkTisv7c,474
 endoreg_db/models/metadata/frame_ocr_result.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-endoreg_db/models/metadata/model_meta.py,sha256=aZH6Bz5Ss874Knvg1b3Kgq6gU8kVzPHXneunZJNF4yw,7111
-endoreg_db/models/metadata/model_meta_logic.py,sha256=yiIWbxxykUp6VB_7imRqSXcO0RS5GuoYP83O48TyKws,8987
+endoreg_db/models/metadata/model_meta.py,sha256=F_r-PTLeNi4J-4EaGCQkGIguhdl7Bwba7_i56ZAjc-4,7589
+endoreg_db/models/metadata/model_meta_logic.py,sha256=27mqScxUTJXNUVc6CqAs5dXjspEsh0TWPmlxdJVulGc,12015
 endoreg_db/models/metadata/pdf_meta.py,sha256=BTmpSgqxmPKi0apcNjyrZAS4AFKCPXVdBd6VBeyyv6E,3174
 endoreg_db/models/metadata/sensitive_meta.py,sha256=ekLHrW-b5uYcjfkRd0EW5ncx5ef8Bu-K6msDkpWCAbk,13034
 endoreg_db/models/metadata/sensitive_meta_logic.py,sha256=Oh7ssZQEPfKGfRMF5nXKJpOIxXx-Xibd3rpOu-bQilk,29988
@@ -600,7 +600,7 @@ endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CU
 endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
 endoreg_db/services/segment_sync.py,sha256=YgHvIHkbW4mqCu0ACf3zjRSZnNfxWwt4gh5syUVXuE0,6400
 endoreg_db/services/storage_aware_video_processor.py,sha256=kKFK64vXLeBSVkp1YJonU3gFDTeXZ8C4qb9QZZB99SE,13420
-endoreg_db/services/video_import.py,sha256=NhQ9eJRUUNo9-j6c6ru921xt-oBgGMY0KN2zsRpskGI,48239
+endoreg_db/services/video_import.py,sha256=PhcOgxU5M4uSEklBXEWHpIaNX-yIYv1rJy-T-fCU8cs,47830
 endoreg_db/tasks/upload_tasks.py,sha256=OJq7DhNwcbWdXzHY8jz5c51BCVkPN5gSWOz-6Fx6W5M,7799
 endoreg_db/tasks/video_ingest.py,sha256=kxFuYkHijINV0VabQKCFVpJRv6eCAw07tviONurDgg8,5265
 endoreg_db/tasks/video_processing_tasks.py,sha256=KjcERRJ1TZzmavBpvr6OsvSTUViU0PR1ECWnEdzu2Js,14140
@@ -784,7 +784,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
 endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
 endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
 endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
-endoreg_db-0.8.2.8.dist-info/METADATA,sha256=uKCJOlsyIekOWKARAh7y3eO9Ch5KBe0RmcgRM38o4UE,14719
-endoreg_db-0.8.2.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-endoreg_db-0.8.2.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-endoreg_db-0.8.2.8.dist-info/RECORD,,
+endoreg_db-0.8.3.0.dist-info/METADATA,sha256=q7jvhqzrBQmwSOuzXARYftJxbQ5vBUL_zmJG9U338dA,14758
+endoreg_db-0.8.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+endoreg_db-0.8.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+endoreg_db-0.8.3.0.dist-info/RECORD,,

{endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

endoreg-db 0.8.2.8__py3-none-any.whl → 0.8.3.0__py3-none-any.whl

Potentially problematic release.

endoreg-db 0.8.2.8py3-none-any.whl → 0.8.3.0py3-none-any.whl