endoreg-db 0.8.2.4__py3-none-any.whl → 0.8.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -16,15 +16,19 @@ import shutil
16
16
  import time
17
17
  from contextlib import contextmanager
18
18
  from pathlib import Path
19
- from typing import Union, Dict, Any, Optional
19
+ from typing import Union, Dict, Any, Optional, List, Tuple
20
20
  from django.db import transaction
21
21
  from endoreg_db.models import VideoFile, SensitiveMeta
22
22
  from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
23
23
  import random
24
24
  from lx_anonymizer.ocr import trocr_full_image_ocr
25
25
  from endoreg_db.utils.hashs import get_video_hash
26
- from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
26
+ from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
27
+ from typing import TYPE_CHECKING
28
+ from django.db.models.fields.files import FieldFile
27
29
 
30
+ if TYPE_CHECKING:
31
+ from endoreg_db.models import EndoscopyProcessor
28
32
 
29
33
  # File lock configuration (matches PDF import)
30
34
  STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
@@ -59,12 +63,18 @@ class VideoImportService():
59
63
  self.STORAGE_DIR = STORAGE_DIR
60
64
 
61
65
  # Central video instance and processing context
62
- self.current_video = None
66
+ self.current_video: Optional[VideoFile] = None
63
67
  self.processing_context: Dict[str, Any] = {}
64
68
 
65
69
  self.delete_source = False
66
70
 
67
71
  self.logger = logging.getLogger(__name__)
72
+
73
+ def _require_current_video(self) -> VideoFile:
74
+ """Return the current VideoFile or raise if it has not been initialized."""
75
+ if self.current_video is None:
76
+ raise RuntimeError("Current video instance is not set")
77
+ return self.current_video
68
78
 
69
79
  @contextmanager
70
80
  def _file_lock(self, path: Path):
@@ -156,6 +166,9 @@ class VideoImportService():
156
166
  return None
157
167
  raise
158
168
 
169
+ # Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
170
+ self._create_sensitive_file()
171
+
159
172
  # Create or retrieve video instance
160
173
  self._create_or_retrieve_video_instance()
161
174
 
@@ -355,27 +368,25 @@ class VideoImportService():
355
368
 
356
369
  def _setup_processing_environment(self):
357
370
  """Setup the processing environment without file movement."""
358
- # Ensure we have a valid video instance
359
- if not self.current_video:
360
- raise RuntimeError("No video instance available for processing environment setup")
361
-
371
+ video = self._require_current_video()
372
+
362
373
  # Initialize video specifications
363
- self.current_video.initialize_video_specs()
364
-
374
+ video.initialize_video_specs()
375
+
365
376
  # Initialize frame objects in database
366
- self.current_video.initialize_frames()
377
+ video.initialize_frames()
367
378
 
368
379
  # Extract frames BEFORE processing to prevent pipeline 1 conflicts
369
380
  self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
370
381
  try:
371
- frames_extracted = self.current_video.extract_frames(overwrite=False)
382
+ frames_extracted = video.extract_frames(overwrite=False)
372
383
  if frames_extracted:
373
384
  self.processing_context['frames_extracted'] = True
374
385
  self.logger.info("Frame extraction completed successfully")
375
386
 
376
387
  # CRITICAL: Immediately save the frames_extracted state to database
377
388
  # to prevent refresh_from_db() in pipeline 1 from overriding it
378
- state = self.current_video.get_or_create_state()
389
+ state = video.get_or_create_state()
379
390
  if not state.frames_extracted:
380
391
  state.frames_extracted = True
381
392
  state.save(update_fields=['frames_extracted'])
@@ -388,7 +399,7 @@ class VideoImportService():
388
399
  self.processing_context['frames_extracted'] = False
389
400
 
390
401
  # Ensure default patient data
391
- self._ensure_default_patient_data()
402
+ self._ensure_default_patient_data(video_instance=video)
392
403
 
393
404
  self.logger.info("Processing environment setup completed")
394
405
 
@@ -396,11 +407,12 @@ class VideoImportService():
396
407
  """Process frames and extract metadata with anonymization."""
397
408
  # Check frame cleaning availability
398
409
  frame_cleaning_available, FrameCleaner, ReportReader = self._ensure_frame_cleaning_available()
399
-
400
- _current_video = self.current_video
401
- assert _current_video is not None, "Current video instance is None during frame processing"
410
+ video = self._require_current_video()
411
+
412
+ raw_file_field = video.raw_file
413
+ has_raw_file = isinstance(raw_file_field, FieldFile) and bool(raw_file_field.name)
402
414
 
403
- if not (frame_cleaning_available and _current_video.raw_file):
415
+ if not (frame_cleaning_available and has_raw_file):
404
416
  self.logger.warning("Frame cleaning not available or conditions not met, using fallback anonymization.")
405
417
  self._fallback_anonymize_video()
406
418
  return
@@ -409,13 +421,13 @@ class VideoImportService():
409
421
  self.logger.info("Starting frame-level anonymization with processor ROI masking...")
410
422
 
411
423
  # Get processor ROI information
412
- processor_roi, endoscope_roi = self._get_processor_roi_info()
424
+ endoscope_data_roi_nested, endoscope_image_roi = self._get_processor_roi_info()
413
425
 
414
426
  # Perform frame cleaning with timeout to prevent blocking
415
427
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
416
428
 
417
429
  with ThreadPoolExecutor(max_workers=1) as executor:
418
- future = executor.submit(self._perform_frame_cleaning, FrameCleaner, processor_roi, endoscope_roi)
430
+ future = executor.submit(self._perform_frame_cleaning, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi)
419
431
  try:
420
432
  # Increased timeout to better accommodate ffmpeg + OCR
421
433
  future.result(timeout=300)
@@ -427,14 +439,20 @@ class VideoImportService():
427
439
  raw_video_path = self.processing_context.get('raw_video_path')
428
440
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
429
441
  grace_seconds = 60
430
- expected_cleaned = self.current_video.processed_file
442
+ expected_cleaned_path: Optional[Path] = None
443
+ processed_field = video.processed_file
444
+ if isinstance(processed_field, FieldFile) and processed_field.name:
445
+ try:
446
+ expected_cleaned_path = Path(processed_field.path)
447
+ except (NotImplementedError, TypeError, ValueError):
448
+ expected_cleaned_path = None
431
449
  found = False
432
- if expected_cleaned is not None:
450
+ if expected_cleaned_path is not None:
433
451
  for _ in range(grace_seconds):
434
- if expected_cleaned.exists():
435
- self.processing_context['cleaned_video_path'] = expected_cleaned
452
+ if expected_cleaned_path.exists():
453
+ self.processing_context['cleaned_video_path'] = expected_cleaned_path
436
454
  self.processing_context['anonymization_completed'] = True
437
- self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned)
455
+ self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
438
456
  found = True
439
457
  break
440
458
  time.sleep(1)
@@ -442,7 +460,7 @@ class VideoImportService():
442
460
  self._fallback_anonymize_video()
443
461
  if not found:
444
462
  raise TimeoutError("Frame cleaning operation timed out - likely Ollama connection issue")
445
-
463
+
446
464
  except Exception as e:
447
465
  self.logger.warning("Frame cleaning failed (reason: %s), falling back to simple copy", e)
448
466
  # Try fallback anonymization when frame cleaning fails
@@ -455,17 +473,20 @@ class VideoImportService():
455
473
  self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
456
474
 
457
475
  def _save_anonymized_video(self):
458
- anonymized_video_path = self.current_video.get_target_anonymized_video_path()
459
-
476
+ video = self._require_current_video()
477
+ anonymized_video_path = video.get_target_anonymized_video_path()
478
+
460
479
  if not anonymized_video_path.exists():
461
- raise RuntimeError(f"Processed video file not found after assembly for {self.current_video.uuid}: {anonymized_video_path}")
480
+ raise RuntimeError(f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}")
462
481
 
463
482
  new_processed_hash = get_video_hash(anonymized_video_path)
464
- if type(self.current_video).objects.filter(processed_video_hash=new_processed_hash).exclude(pk=self.current_video.pk).exists():
465
- raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {self.current_video.uuid}).")
483
+ if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
484
+ raise ValueError(
485
+ f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
486
+ )
466
487
 
467
- self.current_video.processed_video_hash = new_processed_hash
468
- self.current_video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
488
+ video.processed_video_hash = new_processed_hash
489
+ video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
469
490
 
470
491
  update_fields = [
471
492
  "processed_video_hash",
@@ -474,22 +495,23 @@ class VideoImportService():
474
495
  ]
475
496
 
476
497
  if self.delete_source:
477
- original_raw_file_path_to_delete = self.current_video.get_raw_file_path()
478
- original_raw_frame_dir_to_delete = self.current_video.get_frame_dir_path()
498
+ original_raw_file_path_to_delete = video.get_raw_file_path()
499
+ original_raw_frame_dir_to_delete = video.get_frame_dir_path()
479
500
 
480
- self.current_video.raw_file.name = None
501
+ video.raw_file.name = None # type: ignore[assignment]
481
502
 
482
503
  update_fields.extend(["raw_file", "video_hash"])
483
504
 
484
505
  transaction.on_commit(lambda: _cleanup_raw_assets(
485
- video_uuid=self.current_video.uuid,
506
+ video_uuid=video.uuid,
486
507
  raw_file_path=original_raw_file_path_to_delete,
487
508
  raw_frame_dir=original_raw_frame_dir_to_delete
488
509
  ))
489
510
 
490
- self.current_video.save(update_fields=update_fields)
491
- self.current_video.state.mark_anonymized(save=True)
492
- self.current_video.refresh_from_db()
511
+ video.save(update_fields=update_fields)
512
+ video.state.mark_anonymized(save=True)
513
+ video.refresh_from_db()
514
+ self.current_video = video
493
515
  return True
494
516
 
495
517
  def _fallback_anonymize_video(self):
@@ -498,23 +520,23 @@ class VideoImportService():
498
520
  """
499
521
  try:
500
522
  self.logger.info("Attempting fallback video anonymization...")
501
- if self.current_video:
523
+ video = self.current_video
524
+ if video is None:
525
+ self.logger.warning("No VideoFile instance available for fallback anonymization")
526
+ else:
502
527
  # Try VideoFile.pipe_2() method if available
503
- if hasattr(self.current_video, 'pipe_2'):
528
+ if hasattr(video, 'pipe_2'):
504
529
  self.logger.info("Trying VideoFile.pipe_2() method...")
505
- if self.current_video.pipe_2():
530
+ if video.pipe_2():
506
531
  self.logger.info("VideoFile.pipe_2() succeeded")
507
532
  self.processing_context['anonymization_completed'] = True
508
533
  return
509
- else:
510
- self.logger.warning("VideoFile.pipe_2() returned False")
534
+ self.logger.warning("VideoFile.pipe_2() returned False")
511
535
  # Try direct anonymization via _anonymize
512
- if _anonymize(self.current_video, delete_original_raw=self.delete_source):
536
+ if _anonymize(video, delete_original_raw=self.delete_source):
513
537
  self.logger.info("VideoFile._anonymize() succeeded")
514
538
  self.processing_context['anonymization_completed'] = True
515
539
  return
516
- else:
517
- self.logger.warning("No VideoFile instance available for fallback anonymization")
518
540
 
519
541
  # Strategy 2: Simple copy (no processing, just copy raw to processed)
520
542
  self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
@@ -524,33 +546,19 @@ class VideoImportService():
524
546
  except Exception as e:
525
547
  self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
526
548
  self.processing_context['anonymization_completed'] = False
527
- self.processing_context['error_reason']
549
+ self.processing_context['error_reason'] = str(e)
528
550
  def _finalize_processing(self):
529
551
  """Finalize processing and update video state."""
530
552
  self.logger.info("Updating video processing state...")
531
553
 
532
554
  with transaction.atomic():
533
- # Update basic processing states
534
- # Ensure state exists before accessing it
555
+ video = self._require_current_video()
556
+ try:
557
+ video.refresh_from_db()
558
+ except Exception as refresh_error:
559
+ self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
535
560
 
536
- if not self.current_video:
537
- try:
538
- self.current_video.refresh_from_db()
539
- except Exception as e:
540
- self.logger.error(f"Failed to refresh current_video from DB: {e}")
541
- if not self.current_video:
542
- raise RuntimeError("No current video instance available for finalization")
543
-
544
- if not self.current_video.processed_file:
545
- self.logger.warning("No processed file available for current video")
546
- self.current_video.processed_file = None # Ensure field is not None
547
- self.current_video.mark_sensitive_meta_processed = False
548
- else:
549
- self.current_video.mark_sensitive_meta_processed = True
550
-
551
- state = self.current_video.get_or_create_state()
552
- if not state:
553
- raise RuntimeError("Failed to get or create video state")
561
+ state = video.get_or_create_state()
554
562
 
555
563
  # Only mark frames as extracted if they were successfully extracted
556
564
  if self.processing_context.get('frames_extracted', False):
@@ -579,10 +587,7 @@ class VideoImportService():
579
587
 
580
588
  # Save all state changes
581
589
  state.save()
582
- self.logger.info("Video processing state updated")
583
- # Save all state changes
584
- self.current_video.state.save()
585
- self.current_video.save()
590
+ self.logger.info("Video processing state updated")
586
591
 
587
592
  # Signal completion
588
593
  self._signal_completion()
@@ -590,59 +595,48 @@ class VideoImportService():
590
595
  def _cleanup_and_archive(self):
591
596
  """Move processed video to anonym_videos and cleanup."""
592
597
  from endoreg_db.utils import data_paths
593
-
594
- # Define target directory for processed videos
598
+
595
599
  anonym_videos_dir = data_paths["anonym_video"] # /data/anonym_videos
596
600
  anonym_videos_dir.mkdir(parents=True, exist_ok=True)
597
-
598
- # Check if we have a processed/cleaned video
601
+
602
+ video = self._require_current_video()
603
+
599
604
  processed_video_path = None
600
-
601
- # Look for cleaned video from frame cleaning process
602
605
  if 'cleaned_video_path' in self.processing_context:
603
606
  processed_video_path = self.processing_context['cleaned_video_path']
604
607
  else:
605
- # If no processing occurred, copy from raw video location
606
608
  raw_video_path = self.processing_context.get('raw_video_path')
607
609
  if raw_video_path and Path(raw_video_path).exists():
608
610
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
609
611
  processed_filename = f"processed_{video_filename}"
610
612
  processed_video_path = Path(raw_video_path).parent / processed_filename
611
-
612
- # Copy raw to processed location (will be moved to anonym_videos)
613
613
  try:
614
614
  shutil.copy2(str(raw_video_path), str(processed_video_path))
615
615
  self.logger.info("Copied raw video for processing: %s", processed_video_path)
616
- except Exception as e:
617
- self.logger.error("Failed to copy raw video: %s", e)
618
- processed_video_path = None # FIXED: Don't use raw as fallback
619
-
620
- # Move processed video to anonym_videos ONLY if it exists
616
+ except Exception as exc:
617
+ self.logger.error("Failed to copy raw video: %s", exc)
618
+ processed_video_path = None
619
+
621
620
  if processed_video_path and Path(processed_video_path).exists():
622
621
  try:
623
- # ✅ Clean filename: no original filename leakage
624
622
  ext = Path(processed_video_path).suffix or ".mp4"
625
- anonym_video_filename = f"anonym_{self.current_video.uuid}{ext}"
623
+ anonym_video_filename = f"anonym_{video.uuid}{ext}"
626
624
  anonym_target_path = anonym_videos_dir / anonym_video_filename
627
625
 
628
- # Move processed video to anonym_videos/
629
626
  shutil.move(str(processed_video_path), str(anonym_target_path))
630
627
  self.logger.info("Moved processed video to: %s", anonym_target_path)
631
628
 
632
- # Verify the file actually exists before updating database
633
629
  if anonym_target_path.exists():
634
630
  try:
635
631
  storage_root = data_paths["storage"]
636
632
  relative_path = anonym_target_path.relative_to(storage_root)
637
- # Save relative path (e.g. anonym_videos/anonym_<uuid>.mp4)
638
- self.current_video.processed_file.name = str(relative_path)
639
- self.current_video.save(update_fields=["processed_file"])
633
+ video.processed_file.name = str(relative_path)
634
+ video.save(update_fields=["processed_file"])
640
635
  self.logger.info("Updated processed_file path to: %s", relative_path)
641
- except Exception as e:
642
- self.logger.error("Failed to update processed_file path: %s", e)
643
- # Fallback to simple relative path
644
- self.current_video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
645
- self.current_video.save(update_fields=['processed_file'])
636
+ except Exception as exc:
637
+ self.logger.error("Failed to update processed_file path: %s", exc)
638
+ video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
639
+ video.save(update_fields=['processed_file'])
646
640
  self.logger.info(
647
641
  "Updated processed_file path using fallback: %s",
648
642
  f"anonym_videos/{anonym_video_filename}",
@@ -651,277 +645,185 @@ class VideoImportService():
651
645
  self.processing_context['anonymization_completed'] = True
652
646
  else:
653
647
  self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
654
- except Exception as e:
655
- self.logger.error("Failed to move processed video to anonym_videos: %s", e)
648
+ except Exception as exc:
649
+ self.logger.error("Failed to move processed video to anonym_videos: %s", exc)
656
650
  else:
657
651
  self.logger.warning("No processed video available - processed_file will remain empty")
658
- # Leave processed_file empty/null - frontend should fall back to raw_file
659
-
660
- # Cleanup temporary directories
652
+
661
653
  try:
662
654
  from endoreg_db.utils.paths import RAW_FRAME_DIR
663
655
  shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
664
656
  self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
665
- except Exception as e:
666
- self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, e)
667
-
668
- # Handle source file deletion - this should already be moved, but check raw_videos
657
+ except Exception as exc:
658
+ self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
659
+
669
660
  source_path = self.processing_context['file_path']
670
661
  if self.processing_context['delete_source'] and Path(source_path).exists():
671
662
  try:
672
663
  os.remove(source_path)
673
664
  self.logger.info("Removed remaining source file: %s", source_path)
674
- except Exception as e:
675
- self.logger.warning("Failed to remove source file %s: %s", source_path, e)
676
-
677
- # Check if processed video exists and otherwise call anonymize
678
-
679
- if not self.current_video.processed_file or not Path(self.current_video.processed_file.path).exists():
665
+ except Exception as exc:
666
+ self.logger.warning("Failed to remove source file %s: %s", source_path, exc)
667
+
668
+ if not video.processed_file or not Path(video.processed_file.path).exists():
680
669
  self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
681
- self.current_video.anonymize(delete_original_raw=self.delete_source)
682
- self.current_video.save(update_fields=['processed_file'])
683
-
684
-
670
+ try:
671
+ video.anonymize(delete_original_raw=self.delete_source)
672
+ video.save(update_fields=['processed_file'])
673
+ self.logger.info("Late-stage anonymization succeeded")
674
+ except Exception as e:
675
+ self.logger.error("Late-stage anonymization failed: %s", e)
676
+ self.processing_context['anonymization_completed'] = False
677
+
685
678
  self.logger.info("Cleanup and archiving completed")
686
-
687
-
688
-
689
- # Mark as processed (in-memory tracking)
679
+
690
680
  self.processed_files.add(str(self.processing_context['file_path']))
691
-
692
- # Refresh from database and finalize state
681
+
693
682
  with transaction.atomic():
694
- self.current_video.refresh_from_db()
695
- if hasattr(self.current_video, 'state') and self.processing_context.get('anonymization_completed'):
696
- self.current_video.state.mark_sensitive_meta_processed(save=True)
697
-
698
-
699
- self.logger.info("Import and anonymization completed for VideoFile UUID: %s", self.current_video.uuid)
683
+ video.refresh_from_db()
684
+ if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
685
+ video.state.mark_sensitive_meta_processed(save=True)
686
+
687
+ self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
700
688
  self.logger.info("Raw video stored in: /data/videos")
701
689
  self.logger.info("Processed video stored in: /data/anonym_videos")
702
690
 
703
- def _create_sensitive_file(self, video_instance: "VideoFile" = None, file_path: Union[Path, str] = None) -> Path:
704
- """
705
- Create a sensitive file for the given video file by copying the original file and updating the path.
706
- Uses the central video instance and processing context if parameters not provided.
691
+ def _create_sensitive_file(
692
+ self,
693
+ video_instance: VideoFile | None = None,
694
+ file_path: Path | str | None = None,
695
+ ) -> Path:
696
+ """Create or move a sensitive copy of the raw video file inside storage."""
707
697
 
708
- Args:
709
- video_instance: Optional video instance, defaults to self.current_video
710
- file_path: Optional file path, defaults to processing_context['file_path']
698
+ video = video_instance or self._require_current_video()
711
699
 
712
- Returns:
713
- Path: The path to the created sensitive file.
714
- """
715
- video_file = video_instance or self.current_video
716
- # Always use the currently stored raw file path from the model to avoid deleting external source assets
717
- source_path = None
700
+ raw_field: FieldFile | None = getattr(video, "raw_file", None)
701
+ source_path: Path | None = None
718
702
  try:
719
- if video_file and hasattr(video_file, 'raw_file') and video_file.raw_file and hasattr(video_file.raw_file, 'path'):
720
- source_path = Path(video_file.raw_file.path)
703
+ if raw_field and raw_field.path:
704
+ source_path = Path(raw_field.path)
721
705
  except Exception:
722
706
  source_path = None
723
- # Fallback only if explicitly provided (do NOT default to processing_context input file)
707
+
724
708
  if source_path is None and file_path is not None:
725
709
  source_path = Path(file_path)
726
-
727
- if not video_file:
728
- raise ValueError("No video instance available for creating sensitive file")
729
- if not source_path:
710
+
711
+ if source_path is None:
730
712
  raise ValueError("No file path available for creating sensitive file")
731
-
732
- if not video_file.raw_file:
713
+ if not raw_field:
733
714
  raise ValueError("VideoFile must have a raw_file to create a sensitive file")
734
-
735
- # Ensure the target directory exists
736
- target_dir = VIDEO_DIR / 'sensitive'
715
+
716
+ target_dir = VIDEO_DIR / "sensitive"
737
717
  if not target_dir.exists():
738
- self.logger.info(f"Creating sensitive file directory: {target_dir}")
718
+ self.logger.info("Creating sensitive file directory: %s", target_dir)
739
719
  os.makedirs(target_dir, exist_ok=True)
740
-
741
- # Move the stored raw file into the sensitive directory within storage
720
+
742
721
  target_file_path = target_dir / source_path.name
743
722
  try:
744
- # Prefer a move within the storage to avoid extra disk usage. This does not touch external input files.
745
723
  shutil.move(str(source_path), str(target_file_path))
746
- self.logger.info(f"Moved raw file to sensitive directory: {target_file_path}")
747
- except Exception as e:
748
- # Fallback to copy if move fails (e.g., cross-device or permissions), then remove only the original stored raw file
749
- self.logger.warning(f"Failed to move raw file to sensitive dir, copying instead: {e}")
724
+ self.logger.info("Moved raw file to sensitive directory: %s", target_file_path)
725
+ except Exception as exc:
726
+ self.logger.warning("Failed to move raw file to sensitive dir, copying instead: %s", exc)
750
727
  shutil.copy(str(source_path), str(target_file_path))
751
728
  try:
752
- # Remove only the stored raw file copy; never touch external input paths here
753
729
  os.remove(source_path)
754
730
  except FileNotFoundError:
755
731
  pass
756
-
757
- # Update the model to point to the sensitive file location
758
- # Use relative path from storage root, like in create_from_file.py
732
+
759
733
  try:
760
734
  from endoreg_db.utils import data_paths
735
+
761
736
  storage_root = data_paths["storage"]
762
737
  relative_path = target_file_path.relative_to(storage_root)
763
- video_file.raw_file.name = str(relative_path)
764
- video_file.save(update_fields=['raw_file'])
765
- self.logger.info(f"Updated video.raw_file to point to sensitive location: {relative_path}")
766
- except Exception as e:
767
- # Fallback to absolute path conversion if relative path fails
768
- self.logger.warning(f"Failed to set relative path, using fallback: {e}")
769
- video_file.raw_file.name = f"videos/sensitive/{target_file_path.name}"
770
- video_file.save(update_fields=['raw_file'])
771
- self.logger.info(f"Updated video.raw_file using fallback method: videos/sensitive/{target_file_path.name}")
772
-
773
- # Important: Do NOT remove the original input asset passed to the service here.
774
- # Source file cleanup for external inputs is handled by create_from_file via delete_source flag.
775
-
776
- self.logger.info(f"Created sensitive file for {video_file.uuid} at {target_file_path}")
777
- return target_file_path
738
+ video.raw_file.name = str(relative_path)
739
+ video.save(update_fields=["raw_file"])
740
+ self.logger.info("Updated video.raw_file to point to sensitive location: %s", relative_path)
741
+ except Exception as exc:
742
+ self.logger.warning("Failed to set relative path, using fallback: %s", exc)
743
+ video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
744
+ video.save(update_fields=["raw_file"])
745
+ self.logger.info(
746
+ "Updated video.raw_file using fallback method: videos/sensitive/%s",
747
+ target_file_path.name,
748
+ )
778
749
 
750
+ self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
751
+ return target_file_path
779
752
 
753
+ def _get_processor_roi_info(self) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
754
+ """Get processor ROI information for masking."""
755
+ endoscope_data_roi_nested = None
756
+ endoscope_image_roi = None
780
757
 
758
+ video = self._require_current_video()
781
759
 
782
- def _ensure_frame_cleaning_available(self):
783
- """
784
- Ensure frame cleaning modules are available by adding lx-anonymizer to path.
785
-
786
- Returns:
787
- Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
788
- """
789
760
  try:
790
- # Check if we can find the lx-anonymizer directory
791
- from importlib import resources
792
- lx_anonymizer_path = resources.files("lx_anonymizer")
793
-
794
- if lx_anonymizer_path.exists():
795
- # Add to Python path temporarily
796
- if str(lx_anonymizer_path) not in sys.path:
797
- sys.path.insert(0, str(lx_anonymizer_path))
798
-
799
- # Try simple import
800
- from lx_anonymizer import FrameCleaner, ReportReader
801
-
802
- self.logger.info("Successfully imported lx_anonymizer modules")
803
-
804
- # Remove from path to avoid conflicts
805
- if str(lx_anonymizer_path) in sys.path:
806
- sys.path.remove(str(lx_anonymizer_path))
807
-
808
- return True, FrameCleaner, ReportReader
809
-
761
+ video_meta = getattr(video, "video_meta", None)
762
+ processor = getattr(video_meta, "processor", None) if video_meta else None
763
+ if processor:
764
+ assert isinstance(processor, EndoscopyProcessor), "Processor is not of type EndoscopyProcessor"
765
+ endoscope_image_roi = processor.get_roi_endoscope_image()
766
+ endoscope_data_roi_nested = processor.get_rois()
767
+ self.logger.info("Retrieved processor ROI information: endoscope_image_roi=%s", endoscope_image_roi)
810
768
  else:
811
- self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
812
-
813
- except Exception as e:
814
- self.logger.warning(f"Frame cleaning not available: {e}")
815
-
816
- return False, None, None
769
+ self.logger.warning(
770
+ "No processor found for video %s, proceeding without ROI masking",
771
+ video.uuid,
772
+ )
773
+ except Exception as exc:
774
+ self.logger.error("Failed to retrieve processor ROI information: %s", exc)
817
775
 
818
- def _get_processor_roi_info(self):
819
- """Get processor ROI information for masking."""
820
- processor_roi = None
821
- endoscope_roi = None
822
-
823
- try:
824
- if self.current_video.video_meta and self.current_video.video_meta.processor:
825
- processor = getattr(self.current_video.video_meta, "processor", None)
826
-
827
- # Get the endoscope ROI for masking
828
- endoscope_roi = processor.get_roi_endoscope_image()
829
-
830
- # Get all processor ROIs for comprehensive masking
831
- processor_roi = {
832
- 'endoscope_image': endoscope_roi,
833
- 'patient_first_name': processor.get_roi_patient_first_name(),
834
- 'patient_last_name': processor.get_roi_patient_last_name(),
835
- 'patient_dob': processor.get_roi_patient_dob(),
836
- 'examination_date': processor.get_roi_examination_date(),
837
- 'examination_time': processor.get_roi_examination_time(),
838
- 'endoscope_type': processor.get_roi_endoscope_type(),
839
- 'endoscopy_sn': processor.get_roi_endoscopy_sn(),
840
- }
841
-
842
- self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
843
- else:
844
- self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
845
-
846
- except Exception as e:
847
- self.logger.error(f"Failed to retrieve processor ROI information: {e}")
848
- # Continue without ROI - don't fail the entire import process
849
-
850
- return processor_roi, endoscope_roi
776
+ return endoscope_data_roi_nested, endoscope_image_roi
851
777
 
778
+ def _ensure_default_patient_data(self, video_instance: VideoFile | None = None) -> None:
779
+ """Ensure minimum patient data is present on the video's SensitiveMeta."""
852
780
 
853
- def _ensure_default_patient_data(self, video_instance: "VideoFile" = None) -> None:
854
- """
855
- Ensure video has minimum required patient data in SensitiveMeta.
856
- Creates default values if data is missing after OCR processing.
857
- Uses the central video instance if parameter not provided.
858
-
859
- Args:
860
- video_instance: Optional video instance, defaults to self.current_video
861
- """
862
- video_file = video_instance or self.current_video
863
-
864
- if not video_file:
865
- raise ValueError("No video instance available for ensuring patient data")
866
-
867
- if not video_file.sensitive_meta:
868
- self.logger.info(f"No SensitiveMeta found for video {video_file.uuid}, creating default")
869
-
870
- # Create default SensitiveMeta with placeholder data
781
+ video = video_instance or self._require_current_video()
782
+
783
+ sensitive_meta = getattr(video, "sensitive_meta", None)
784
+ if not sensitive_meta:
785
+ self.logger.info("No SensitiveMeta found for video %s, creating default", video.uuid)
871
786
  default_data = {
872
787
  "patient_first_name": "Patient",
873
- "patient_last_name": "Unknown",
874
- "patient_dob": date(1990, 1, 1), # Default DOB
788
+ "patient_last_name": "Unknown",
789
+ "patient_dob": date(1990, 1, 1),
875
790
  "examination_date": date.today(),
876
- "center_name": video_file.center.name if video_file.center else "university_hospital_wuerzburg"
791
+ "center_name": video.center.name if video.center else "university_hospital_wuerzburg",
877
792
  }
878
-
879
793
  try:
880
794
  sensitive_meta = SensitiveMeta.create_from_dict(default_data)
881
- video_file.sensitive_meta = sensitive_meta
882
- video_file.save(update_fields=['sensitive_meta'])
883
-
884
- # Mark sensitive meta as processed after creating default data
885
- state = video_file.get_or_create_state()
795
+ video.sensitive_meta = sensitive_meta
796
+ video.save(update_fields=["sensitive_meta"])
797
+ state = video.get_or_create_state()
886
798
  state.mark_sensitive_meta_processed(save=True)
887
-
888
- self.logger.info(f"Created default SensitiveMeta for video {video_file.uuid}")
889
- except Exception as e:
890
- self.logger.error(f"Failed to create default SensitiveMeta for video {video_file.uuid}: {e}")
799
+ self.logger.info("Created default SensitiveMeta for video %s", video.uuid)
800
+ except Exception as exc:
801
+ self.logger.error("Failed to create default SensitiveMeta for video %s: %s", video.uuid, exc)
891
802
  return
892
-
893
803
  else:
894
- # Update existing SensitiveMeta with missing fields
895
- update_needed = False
896
- update_data = {}
897
-
898
- if not video_file.sensitive_meta.patient_first_name:
804
+ update_data: Dict[str, Any] = {}
805
+ if not sensitive_meta.patient_first_name:
899
806
  update_data["patient_first_name"] = "Patient"
900
- update_needed = True
901
-
902
- if not video_file.sensitive_meta.patient_last_name:
807
+ if not sensitive_meta.patient_last_name:
903
808
  update_data["patient_last_name"] = "Unknown"
904
- update_needed = True
905
-
906
- if not video_file.sensitive_meta.patient_dob:
809
+ if not sensitive_meta.patient_dob:
907
810
  update_data["patient_dob"] = date(1990, 1, 1)
908
- update_needed = True
909
-
910
- if not video_file.sensitive_meta.examination_date:
811
+ if not sensitive_meta.examination_date:
911
812
  update_data["examination_date"] = date.today()
912
- update_needed = True
913
-
914
- if update_needed:
813
+
814
+ if update_data:
915
815
  try:
916
- video_file.sensitive_meta.update_from_dict(update_data)
917
-
918
- # Mark sensitive meta as processed after updating missing fields
919
- state = video_file.get_or_create_state()
816
+ sensitive_meta.update_from_dict(update_data)
817
+ state = video.get_or_create_state()
920
818
  state.mark_sensitive_meta_processed(save=True)
921
-
922
- self.logger.info(f"Updated missing SensitiveMeta fields for video {video_file.uuid}: {list(update_data.keys())}")
923
- except Exception as e:
924
- self.logger.error(f"Failed to update SensitiveMeta for video {video_file.uuid}: {e}")
819
+ self.logger.info(
820
+ "Updated missing SensitiveMeta fields for video %s: %s",
821
+ video.uuid,
822
+ list(update_data.keys()),
823
+ )
824
+ except Exception as exc:
825
+ self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
826
+
925
827
 
926
828
 
927
829
  def _ensure_frame_cleaning_available(self):
@@ -935,6 +837,9 @@ class VideoImportService():
935
837
  # Check if we can find the lx-anonymizer directory
936
838
  from importlib import resources
937
839
  lx_anonymizer_path = resources.files("lx_anonymizer")
840
+
841
+ # make sure lx_anonymizer_path is a Path object
842
+ lx_anonymizer_path = Path(str(lx_anonymizer_path))
938
843
 
939
844
  if lx_anonymizer_path.exists():
940
845
  # Add to Python path temporarily
@@ -960,41 +865,9 @@ class VideoImportService():
960
865
 
961
866
  return False, None, None
962
867
 
963
- def _get_processor_roi_info(self):
964
- """Get processor ROI information for masking."""
965
- processor_roi = None
966
- endoscope_roi = None
967
-
968
- try:
969
- if self.current_video.video_meta and self.current_video.video_meta.processor:
970
- processor = getattr(self.current_video.video_meta, "processor", None)
971
-
972
- # Get the endoscope ROI for masking
973
- endoscope_roi = processor.get_roi_endoscope_image()
974
-
975
- # Get all processor ROIs for comprehensive masking
976
- processor_roi = {
977
- 'endoscope_image': endoscope_roi,
978
- 'patient_first_name': processor.get_roi_patient_first_name(),
979
- 'patient_last_name': processor.get_roi_patient_last_name(),
980
- 'patient_dob': processor.get_roi_patient_dob(),
981
- 'examination_date': processor.get_roi_examination_date(),
982
- 'examination_time': processor.get_roi_examination_time(),
983
- 'endoscope_type': processor.get_roi_endoscope_type(),
984
- 'endoscopy_sn': processor.get_roi_endoscopy_sn(),
985
- }
986
-
987
- self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
988
- else:
989
- self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
990
-
991
- except Exception as e:
992
- self.logger.error(f"Failed to retrieve processor ROI information: {e}")
993
- # Continue without ROI - don't fail the entire import process
994
-
995
- return processor_roi, endoscope_roi
868
+
996
869
 
997
- def _perform_frame_cleaning(self, FrameCleaner, processor_roi, endoscope_roi):
870
+ def _perform_frame_cleaning(self, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi):
998
871
  """Perform frame cleaning and anonymization."""
999
872
  # Instantiate frame cleaner
1000
873
  frame_cleaner = FrameCleaner()
@@ -1006,32 +879,35 @@ class VideoImportService():
1006
879
  raise RuntimeError(f"Raw video path not found: {raw_video_path}")
1007
880
 
1008
881
  # Get processor name safely
1009
- processor = getattr(self.current_video.video_meta, "processor", None) if self.current_video.video_meta else None
882
+ video = self._require_current_video()
883
+ video_meta = getattr(video, "video_meta", None)
884
+ processor = getattr(video_meta, "processor", None) if video_meta else None
1010
885
  device_name = processor.name if processor else self.processing_context['processor_name']
1011
-
1012
- tmp_dir = RAW_FRAME_DIR
1013
-
886
+
1014
887
  # Create temporary output path for cleaned video
1015
888
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
1016
889
  cleaned_filename = f"cleaned_{video_filename}"
1017
890
  cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
1018
891
 
892
+ processor_roi, endoscope_roi = self._get_processor_roi_info(video)
893
+
894
+ # Processor roi can be used later to OCR preknown regions.
895
+
1019
896
  # Clean video with ROI masking (heavy I/O operation)
1020
897
  actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
1021
- Path(raw_video_path),
1022
- self.current_video,
1023
- tmp_dir,
1024
- device_name,
1025
- endoscope_roi,
1026
- processor_roi,
1027
- cleaned_video_path
898
+ video_path=Path(raw_video_path),
899
+ video_file_obj=video,
900
+ endoscope_image_roi=endoscope_image_roi,
901
+ endoscope_data_roi_nested=endoscope_data_roi_nested,
902
+ output_path=cleaned_video_path,
903
+ technique="mask_overlay"
1028
904
  )
1029
905
 
1030
906
  # Optional: enrich metadata using TrOCR+LLM on one random extracted frame
1031
907
  try:
1032
908
  # Prefer frames belonging to this video (UUID in path), else pick any frame
1033
909
  frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
1034
- video_uuid = str(self.current_video.uuid)
910
+ video_uuid = str(video.uuid)
1035
911
  filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
1036
912
  if filtered:
1037
913
  sample_frame = random.choice(filtered)
@@ -1062,88 +938,74 @@ class VideoImportService():
1062
938
  self.logger.info(f"Frame cleaning with ROI masking completed: {actual_cleaned_path}")
1063
939
  self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
1064
940
 
1065
- def _update_sensitive_metadata(self, extracted_metadata):
941
+ def _update_sensitive_metadata(self, extracted_metadata: Dict[str, Any]):
1066
942
  """
1067
943
  Update sensitive metadata with extracted information.
1068
-
1069
- SAFETY MECHANISM: Only updates fields that are empty, default values, or explicitly marked as safe to overwrite.
1070
- This prevents accidentally overwriting valuable manually entered or previously extracted data.
944
+ Args:
945
+ extracted_metadata (Dict[str, Any]): Extracted metadata to update.
1071
946
  """
1072
- if not (self.current_video.sensitive_meta and extracted_metadata):
947
+ video = self._require_current_video()
948
+ sensitive_meta = getattr(video, "sensitive_meta", None)
949
+
950
+ if not (sensitive_meta and extracted_metadata):
1073
951
  return
1074
-
1075
- sm = self.current_video.sensitive_meta
952
+
953
+ sm = sensitive_meta
1076
954
  updated_fields = []
1077
955
 
1078
- # Map extracted metadata to SensitiveMeta fields
1079
- metadata_mapping = {
1080
- 'patient_first_name': 'patient_first_name',
1081
- 'patient_last_name': 'patient_last_name',
1082
- 'patient_dob': 'patient_dob',
1083
- 'examination_date': 'examination_date',
1084
- 'endoscope_type': 'endoscope_type'
1085
- }
1086
-
1087
- # Define default/placeholder values that are safe to overwrite
1088
- SAFE_TO_OVERWRITE_VALUES = [
1089
- 'Vorname unbekannt', # Default first name
1090
- 'Nachname unbekannt', # Default last name
1091
- date(1990, 1, 1), # Default DOB
1092
- None, # Empty values
1093
- '', # Empty strings
1094
- 'N/A', # Placeholder values
1095
- 'Unbekanntes Gerät', # Default device name
1096
- ]
1097
-
1098
- for meta_key, sm_field in metadata_mapping.items():
1099
- if extracted_metadata.get(meta_key) and hasattr(sm, sm_field):
1100
- old_value = getattr(sm, sm_field)
1101
- new_value = extracted_metadata[meta_key]
1102
-
1103
- # Enhanced safety check: Only update if current value is safe to overwrite
1104
- if new_value and (old_value in SAFE_TO_OVERWRITE_VALUES):
1105
- self.logger.info(f"Updating {sm_field} from '{old_value}' to '{new_value}' for video {self.current_video.uuid}")
1106
- setattr(sm, sm_field, new_value)
1107
- updated_fields.append(sm_field)
1108
- elif new_value and old_value and old_value not in SAFE_TO_OVERWRITE_VALUES:
1109
- self.logger.info(f"Preserving existing {sm_field} value '{old_value}' (not overwriting with '{new_value}') for video {self.current_video.uuid}")
1110
-
956
+ try:
957
+ sm.update_from_dict(extracted_metadata)
958
+ updated_fields = list(extracted_metadata.keys())
959
+ except KeyError as e:
960
+ self.logger.warning(f"Failed to update SensitiveMeta field {e}")
961
+
1111
962
  if updated_fields:
1112
963
  sm.save(update_fields=updated_fields)
1113
- self.logger.info(f"Updated SensitiveMeta fields for video {self.current_video.uuid}: {updated_fields}")
1114
-
1115
- # Mark sensitive meta as processed after successful update
1116
- self.current_video.state.mark_sensitive_meta_processed(save=True)
1117
- self.logger.info(f"Marked sensitive metadata as processed for video {self.current_video.uuid}")
964
+ self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
965
+
966
+ state = video.get_or_create_state()
967
+ state.mark_sensitive_meta_processed(save=True)
968
+ self.logger.info("Marked sensitive metadata as processed for video %s", video.uuid)
1118
969
  else:
1119
- self.logger.info(f"No SensitiveMeta fields updated for video {self.current_video.uuid} - all existing values preserved")
970
+ self.logger.info("No SensitiveMeta fields updated for video %s - all existing values preserved", video.uuid)
1120
971
 
1121
972
  def _signal_completion(self):
1122
973
  """Signal completion to the tracking system."""
1123
974
  try:
975
+ video = self._require_current_video()
976
+
977
+ raw_field: FieldFile | None = getattr(video, "raw_file", None)
978
+ raw_exists = False
979
+ if raw_field and getattr(raw_field, "path", None):
980
+ try:
981
+ raw_exists = Path(raw_field.path).exists()
982
+ except (ValueError, OSError):
983
+ raw_exists = False
984
+
1124
985
  video_processing_complete = (
1125
- self.current_video.sensitive_meta is not None and
1126
- self.current_video.video_meta is not None and
1127
- self.current_video.raw_file and
1128
- hasattr(self.current_video.raw_file, 'path') and
1129
- Path(self.current_video.raw_file.path).exists()
986
+ video.sensitive_meta is not None and
987
+ video.video_meta is not None and
988
+ raw_exists
1130
989
  )
1131
-
990
+
1132
991
  if video_processing_complete:
1133
- self.logger.info(f"Video {self.current_video.uuid} processing completed successfully - ready for validation")
1134
-
992
+ self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
993
+
1135
994
  # Update completion flags if they exist
1136
995
  completion_fields = []
1137
996
  for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
1138
- if hasattr(self.current_video, field_name):
1139
- setattr(self.current_video, field_name, True)
997
+ if hasattr(video, field_name):
998
+ setattr(video, field_name, True)
1140
999
  completion_fields.append(field_name)
1141
1000
 
1142
1001
  if completion_fields:
1143
- self.current_video.save(update_fields=completion_fields)
1144
- self.logger.info(f"Updated completion flags: {completion_fields}")
1002
+ video.save(update_fields=completion_fields)
1003
+ self.logger.info("Updated completion flags: %s", completion_fields)
1145
1004
  else:
1146
- self.logger.warning(f"Video {self.current_video.uuid} processing incomplete - missing required components")
1005
+ self.logger.warning(
1006
+ "Video %s processing incomplete - missing required components",
1007
+ video.uuid,
1008
+ )
1147
1009
 
1148
1010
  except Exception as e:
1149
1011
  self.logger.warning(f"Failed to signal completion status: {e}")
@@ -1186,6 +1048,9 @@ class VideoImportService():
1186
1048
  self.processed_files.remove(file_path_str)
1187
1049
  self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
1188
1050
 
1051
+
1052
+
1053
+
1189
1054
  except Exception as e:
1190
1055
  self.logger.warning(f"Error during context cleanup: {e}")
1191
1056
  finally:
@@ -1200,7 +1065,7 @@ def import_and_anonymize(
1200
1065
  processor_name: str,
1201
1066
  save_video: bool = True,
1202
1067
  delete_source: bool = False,
1203
- ) -> "VideoFile":
1068
+ ) -> VideoFile | None:
1204
1069
  """Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
1205
1070
  Kept for backward compatibility with callers that import this function directly.
1206
1071
  """