endoreg-db 0.8.2.1__py3-none-any.whl → 0.8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -18,13 +18,18 @@ from contextlib import contextmanager
18
18
  from pathlib import Path
19
19
  from typing import Union, Dict, Any, Optional
20
20
  from django.db import transaction
21
+ from transformers.models.align.convert_align_tf_to_hf import get_processor
21
22
  from endoreg_db.models import VideoFile, SensitiveMeta
22
23
  from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
23
24
  import random
24
25
  from lx_anonymizer.ocr import trocr_full_image_ocr
25
26
  from endoreg_db.utils.hashs import get_video_hash
26
- from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
27
+ from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
28
+ from typing import TYPE_CHECKING
29
+ from django.db.models.fields.files import FieldFile
27
30
 
31
+ if TYPE_CHECKING:
32
+ from endoreg_db.models import EndoscopyProcessor
28
33
 
29
34
  # File lock configuration (matches PDF import)
30
35
  STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
@@ -59,12 +64,18 @@ class VideoImportService():
59
64
  self.STORAGE_DIR = STORAGE_DIR
60
65
 
61
66
  # Central video instance and processing context
62
- self.current_video = None
67
+ self.current_video: Optional[VideoFile] = None
63
68
  self.processing_context: Dict[str, Any] = {}
64
69
 
65
70
  self.delete_source = False
66
71
 
67
72
  self.logger = logging.getLogger(__name__)
73
+
74
+ def _require_current_video(self) -> VideoFile:
75
+ """Return the current VideoFile or raise if it has not been initialized."""
76
+ if self.current_video is None:
77
+ raise RuntimeError("Current video instance is not set")
78
+ return self.current_video
68
79
 
69
80
  @contextmanager
70
81
  def _file_lock(self, path: Path):
@@ -156,6 +167,9 @@ class VideoImportService():
156
167
  return None
157
168
  raise
158
169
 
170
+ # Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
171
+ self._create_sensitive_file()
172
+
159
173
  # Create or retrieve video instance
160
174
  self._create_or_retrieve_video_instance()
161
175
 
@@ -355,27 +369,25 @@ class VideoImportService():
355
369
 
356
370
  def _setup_processing_environment(self):
357
371
  """Setup the processing environment without file movement."""
358
- # Ensure we have a valid video instance
359
- if not self.current_video:
360
- raise RuntimeError("No video instance available for processing environment setup")
361
-
372
+ video = self._require_current_video()
373
+
362
374
  # Initialize video specifications
363
- self.current_video.initialize_video_specs()
364
-
375
+ video.initialize_video_specs()
376
+
365
377
  # Initialize frame objects in database
366
- self.current_video.initialize_frames()
378
+ video.initialize_frames()
367
379
 
368
380
  # Extract frames BEFORE processing to prevent pipeline 1 conflicts
369
381
  self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
370
382
  try:
371
- frames_extracted = self.current_video.extract_frames(overwrite=False)
383
+ frames_extracted = video.extract_frames(overwrite=False)
372
384
  if frames_extracted:
373
385
  self.processing_context['frames_extracted'] = True
374
386
  self.logger.info("Frame extraction completed successfully")
375
387
 
376
388
  # CRITICAL: Immediately save the frames_extracted state to database
377
389
  # to prevent refresh_from_db() in pipeline 1 from overriding it
378
- state = self.current_video.get_or_create_state()
390
+ state = video.get_or_create_state()
379
391
  if not state.frames_extracted:
380
392
  state.frames_extracted = True
381
393
  state.save(update_fields=['frames_extracted'])
@@ -388,7 +400,7 @@ class VideoImportService():
388
400
  self.processing_context['frames_extracted'] = False
389
401
 
390
402
  # Ensure default patient data
391
- self._ensure_default_patient_data()
403
+ self._ensure_default_patient_data(video_instance=video)
392
404
 
393
405
  self.logger.info("Processing environment setup completed")
394
406
 
@@ -396,11 +408,12 @@ class VideoImportService():
396
408
  """Process frames and extract metadata with anonymization."""
397
409
  # Check frame cleaning availability
398
410
  frame_cleaning_available, FrameCleaner, ReportReader = self._ensure_frame_cleaning_available()
399
-
400
- _current_video = self.current_video
401
- assert _current_video is not None, "Current video instance is None during frame processing"
411
+ video = self._require_current_video()
412
+
413
+ raw_file_field = video.raw_file
414
+ has_raw_file = isinstance(raw_file_field, FieldFile) and bool(raw_file_field.name)
402
415
 
403
- if not (frame_cleaning_available and _current_video.raw_file):
416
+ if not (frame_cleaning_available and has_raw_file):
404
417
  self.logger.warning("Frame cleaning not available or conditions not met, using fallback anonymization.")
405
418
  self._fallback_anonymize_video()
406
419
  return
@@ -427,14 +440,20 @@ class VideoImportService():
427
440
  raw_video_path = self.processing_context.get('raw_video_path')
428
441
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
429
442
  grace_seconds = 60
430
- expected_cleaned = self.current_video.processed_file
443
+ expected_cleaned_path: Optional[Path] = None
444
+ processed_field = video.processed_file
445
+ if isinstance(processed_field, FieldFile) and processed_field.name:
446
+ try:
447
+ expected_cleaned_path = Path(processed_field.path)
448
+ except (NotImplementedError, TypeError, ValueError):
449
+ expected_cleaned_path = None
431
450
  found = False
432
- if expected_cleaned is not None:
451
+ if expected_cleaned_path is not None:
433
452
  for _ in range(grace_seconds):
434
- if expected_cleaned.exists():
435
- self.processing_context['cleaned_video_path'] = expected_cleaned
453
+ if expected_cleaned_path.exists():
454
+ self.processing_context['cleaned_video_path'] = expected_cleaned_path
436
455
  self.processing_context['anonymization_completed'] = True
437
- self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned)
456
+ self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
438
457
  found = True
439
458
  break
440
459
  time.sleep(1)
@@ -442,7 +461,7 @@ class VideoImportService():
442
461
  self._fallback_anonymize_video()
443
462
  if not found:
444
463
  raise TimeoutError("Frame cleaning operation timed out - likely Ollama connection issue")
445
-
464
+
446
465
  except Exception as e:
447
466
  self.logger.warning("Frame cleaning failed (reason: %s), falling back to simple copy", e)
448
467
  # Try fallback anonymization when frame cleaning fails
@@ -455,17 +474,20 @@ class VideoImportService():
455
474
  self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
456
475
 
457
476
  def _save_anonymized_video(self):
458
- anonymized_video_path = self.current_video.get_target_anonymized_video_path()
459
-
477
+ video = self._require_current_video()
478
+ anonymized_video_path = video.get_target_anonymized_video_path()
479
+
460
480
  if not anonymized_video_path.exists():
461
- raise RuntimeError(f"Processed video file not found after assembly for {self.current_video.uuid}: {anonymized_video_path}")
481
+ raise RuntimeError(f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}")
462
482
 
463
483
  new_processed_hash = get_video_hash(anonymized_video_path)
464
- if type(self.current_video).objects.filter(processed_video_hash=new_processed_hash).exclude(pk=self.current_video.pk).exists():
465
- raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {self.current_video.uuid}).")
484
+ if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
485
+ raise ValueError(
486
+ f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
487
+ )
466
488
 
467
- self.current_video.processed_video_hash = new_processed_hash
468
- self.current_video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
489
+ video.processed_video_hash = new_processed_hash
490
+ video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
469
491
 
470
492
  update_fields = [
471
493
  "processed_video_hash",
@@ -474,22 +496,23 @@ class VideoImportService():
474
496
  ]
475
497
 
476
498
  if self.delete_source:
477
- original_raw_file_path_to_delete = self.current_video.get_raw_file_path()
478
- original_raw_frame_dir_to_delete = self.current_video.get_frame_dir_path()
499
+ original_raw_file_path_to_delete = video.get_raw_file_path()
500
+ original_raw_frame_dir_to_delete = video.get_frame_dir_path()
479
501
 
480
- self.current_video.raw_file.name = None
502
+ video.raw_file.name = None # type: ignore[assignment]
481
503
 
482
504
  update_fields.extend(["raw_file", "video_hash"])
483
505
 
484
506
  transaction.on_commit(lambda: _cleanup_raw_assets(
485
- video_uuid=self.current_video.uuid,
507
+ video_uuid=video.uuid,
486
508
  raw_file_path=original_raw_file_path_to_delete,
487
509
  raw_frame_dir=original_raw_frame_dir_to_delete
488
510
  ))
489
511
 
490
- self.current_video.save(update_fields=update_fields)
491
- self.current_video.state.mark_anonymized(save=True)
492
- self.current_video.refresh_from_db()
512
+ video.save(update_fields=update_fields)
513
+ video.state.mark_anonymized(save=True)
514
+ video.refresh_from_db()
515
+ self.current_video = video
493
516
  return True
494
517
 
495
518
  def _fallback_anonymize_video(self):
@@ -498,23 +521,23 @@ class VideoImportService():
498
521
  """
499
522
  try:
500
523
  self.logger.info("Attempting fallback video anonymization...")
501
- if self.current_video:
524
+ video = self.current_video
525
+ if video is None:
526
+ self.logger.warning("No VideoFile instance available for fallback anonymization")
527
+ else:
502
528
  # Try VideoFile.pipe_2() method if available
503
- if hasattr(self.current_video, 'pipe_2'):
529
+ if hasattr(video, 'pipe_2'):
504
530
  self.logger.info("Trying VideoFile.pipe_2() method...")
505
- if self.current_video.pipe_2():
531
+ if video.pipe_2():
506
532
  self.logger.info("VideoFile.pipe_2() succeeded")
507
533
  self.processing_context['anonymization_completed'] = True
508
534
  return
509
- else:
510
- self.logger.warning("VideoFile.pipe_2() returned False")
535
+ self.logger.warning("VideoFile.pipe_2() returned False")
511
536
  # Try direct anonymization via _anonymize
512
- if _anonymize(self.current_video, delete_original_raw=self.delete_source):
537
+ if _anonymize(video, delete_original_raw=self.delete_source):
513
538
  self.logger.info("VideoFile._anonymize() succeeded")
514
539
  self.processing_context['anonymization_completed'] = True
515
540
  return
516
- else:
517
- self.logger.warning("No VideoFile instance available for fallback anonymization")
518
541
 
519
542
  # Strategy 2: Simple copy (no processing, just copy raw to processed)
520
543
  self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
@@ -524,33 +547,19 @@ class VideoImportService():
524
547
  except Exception as e:
525
548
  self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
526
549
  self.processing_context['anonymization_completed'] = False
527
- self.processing_context['error_reason']
550
+ self.processing_context['error_reason'] = str(e)
528
551
  def _finalize_processing(self):
529
552
  """Finalize processing and update video state."""
530
553
  self.logger.info("Updating video processing state...")
531
554
 
532
555
  with transaction.atomic():
533
- # Update basic processing states
534
- # Ensure state exists before accessing it
556
+ video = self._require_current_video()
557
+ try:
558
+ video.refresh_from_db()
559
+ except Exception as refresh_error:
560
+ self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
535
561
 
536
- if not self.current_video:
537
- try:
538
- self.current_video.refresh_from_db()
539
- except Exception as e:
540
- self.logger.error(f"Failed to refresh current_video from DB: {e}")
541
- if not self.current_video:
542
- raise RuntimeError("No current video instance available for finalization")
543
-
544
- if not self.current_video.processed_file:
545
- self.logger.warning("No processed file available for current video")
546
- self.current_video.processed_file = None # Ensure field is not None
547
- self.current_video.mark_sensitive_meta_processed = False
548
- else:
549
- self.current_video.mark_sensitive_meta_processed = True
550
-
551
- state = self.current_video.get_or_create_state()
552
- if not state:
553
- raise RuntimeError("Failed to get or create video state")
562
+ state = video.get_or_create_state()
554
563
 
555
564
  # Only mark frames as extracted if they were successfully extracted
556
565
  if self.processing_context.get('frames_extracted', False):
@@ -579,10 +588,7 @@ class VideoImportService():
579
588
 
580
589
  # Save all state changes
581
590
  state.save()
582
- self.logger.info("Video processing state updated")
583
- # Save all state changes
584
- self.current_video.state.save()
585
- self.current_video.save()
591
+ self.logger.info("Video processing state updated")
586
592
 
587
593
  # Signal completion
588
594
  self._signal_completion()
@@ -590,59 +596,48 @@ class VideoImportService():
590
596
  def _cleanup_and_archive(self):
591
597
  """Move processed video to anonym_videos and cleanup."""
592
598
  from endoreg_db.utils import data_paths
593
-
594
- # Define target directory for processed videos
599
+
595
600
  anonym_videos_dir = data_paths["anonym_video"] # /data/anonym_videos
596
601
  anonym_videos_dir.mkdir(parents=True, exist_ok=True)
597
-
598
- # Check if we have a processed/cleaned video
602
+
603
+ video = self._require_current_video()
604
+
599
605
  processed_video_path = None
600
-
601
- # Look for cleaned video from frame cleaning process
602
606
  if 'cleaned_video_path' in self.processing_context:
603
607
  processed_video_path = self.processing_context['cleaned_video_path']
604
608
  else:
605
- # If no processing occurred, copy from raw video location
606
609
  raw_video_path = self.processing_context.get('raw_video_path')
607
610
  if raw_video_path and Path(raw_video_path).exists():
608
611
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
609
612
  processed_filename = f"processed_{video_filename}"
610
613
  processed_video_path = Path(raw_video_path).parent / processed_filename
611
-
612
- # Copy raw to processed location (will be moved to anonym_videos)
613
614
  try:
614
615
  shutil.copy2(str(raw_video_path), str(processed_video_path))
615
616
  self.logger.info("Copied raw video for processing: %s", processed_video_path)
616
- except Exception as e:
617
- self.logger.error("Failed to copy raw video: %s", e)
618
- processed_video_path = None # FIXED: Don't use raw as fallback
619
-
620
- # Move processed video to anonym_videos ONLY if it exists
617
+ except Exception as exc:
618
+ self.logger.error("Failed to copy raw video: %s", exc)
619
+ processed_video_path = None
620
+
621
621
  if processed_video_path and Path(processed_video_path).exists():
622
622
  try:
623
- # ✅ Clean filename: no original filename leakage
624
623
  ext = Path(processed_video_path).suffix or ".mp4"
625
- anonym_video_filename = f"anonym_{self.current_video.uuid}{ext}"
624
+ anonym_video_filename = f"anonym_{video.uuid}{ext}"
626
625
  anonym_target_path = anonym_videos_dir / anonym_video_filename
627
626
 
628
- # Move processed video to anonym_videos/
629
627
  shutil.move(str(processed_video_path), str(anonym_target_path))
630
628
  self.logger.info("Moved processed video to: %s", anonym_target_path)
631
629
 
632
- # Verify the file actually exists before updating database
633
630
  if anonym_target_path.exists():
634
631
  try:
635
632
  storage_root = data_paths["storage"]
636
633
  relative_path = anonym_target_path.relative_to(storage_root)
637
- # Save relative path (e.g. anonym_videos/anonym_<uuid>.mp4)
638
- self.current_video.processed_file.name = str(relative_path)
639
- self.current_video.save(update_fields=["processed_file"])
634
+ video.processed_file.name = str(relative_path)
635
+ video.save(update_fields=["processed_file"])
640
636
  self.logger.info("Updated processed_file path to: %s", relative_path)
641
- except Exception as e:
642
- self.logger.error("Failed to update processed_file path: %s", e)
643
- # Fallback to simple relative path
644
- self.current_video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
645
- self.current_video.save(update_fields=['processed_file'])
637
+ except Exception as exc:
638
+ self.logger.error("Failed to update processed_file path: %s", exc)
639
+ video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
640
+ video.save(update_fields=['processed_file'])
646
641
  self.logger.info(
647
642
  "Updated processed_file path using fallback: %s",
648
643
  f"anonym_videos/{anonym_video_filename}",
@@ -651,277 +646,194 @@ class VideoImportService():
651
646
  self.processing_context['anonymization_completed'] = True
652
647
  else:
653
648
  self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
654
- except Exception as e:
655
- self.logger.error("Failed to move processed video to anonym_videos: %s", e)
649
+ except Exception as exc:
650
+ self.logger.error("Failed to move processed video to anonym_videos: %s", exc)
656
651
  else:
657
652
  self.logger.warning("No processed video available - processed_file will remain empty")
658
- # Leave processed_file empty/null - frontend should fall back to raw_file
659
-
660
- # Cleanup temporary directories
653
+
661
654
  try:
662
655
  from endoreg_db.utils.paths import RAW_FRAME_DIR
663
656
  shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
664
657
  self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
665
- except Exception as e:
666
- self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, e)
667
-
668
- # Handle source file deletion - this should already be moved, but check raw_videos
658
+ except Exception as exc:
659
+ self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
660
+
669
661
  source_path = self.processing_context['file_path']
670
662
  if self.processing_context['delete_source'] and Path(source_path).exists():
671
663
  try:
672
664
  os.remove(source_path)
673
665
  self.logger.info("Removed remaining source file: %s", source_path)
674
- except Exception as e:
675
- self.logger.warning("Failed to remove source file %s: %s", source_path, e)
676
-
677
- # Check if processed video exists and otherwise call anonymize
678
-
679
- if not self.current_video.processed_file or not Path(self.current_video.processed_file.path).exists():
666
+ except Exception as exc:
667
+ self.logger.warning("Failed to remove source file %s: %s", source_path, exc)
668
+
669
+ if not video.processed_file or not Path(video.processed_file.path).exists():
680
670
  self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
681
- self.current_video.anonymize(delete_original_raw=self.delete_source)
682
- self.current_video.save(update_fields=['processed_file'])
683
-
684
-
671
+ try:
672
+ video.anonymize(delete_original_raw=self.delete_source)
673
+ video.save(update_fields=['processed_file'])
674
+ self.logger.info("Late-stage anonymization succeeded")
675
+ except Exception as e:
676
+ self.logger.error("Late-stage anonymization failed: %s", e)
677
+ self.processing_context['anonymization_completed'] = False
678
+
685
679
  self.logger.info("Cleanup and archiving completed")
686
-
687
-
688
-
689
- # Mark as processed (in-memory tracking)
680
+
690
681
  self.processed_files.add(str(self.processing_context['file_path']))
691
-
692
- # Refresh from database and finalize state
682
+
693
683
  with transaction.atomic():
694
- self.current_video.refresh_from_db()
695
- if hasattr(self.current_video, 'state') and self.processing_context.get('anonymization_completed'):
696
- self.current_video.state.mark_sensitive_meta_processed(save=True)
697
-
698
-
699
- self.logger.info("Import and anonymization completed for VideoFile UUID: %s", self.current_video.uuid)
684
+ video.refresh_from_db()
685
+ if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
686
+ video.state.mark_sensitive_meta_processed(save=True)
687
+
688
+ self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
700
689
  self.logger.info("Raw video stored in: /data/videos")
701
690
  self.logger.info("Processed video stored in: /data/anonym_videos")
702
691
 
703
- def _create_sensitive_file(self, video_instance: "VideoFile" = None, file_path: Union[Path, str] = None) -> Path:
704
- """
705
- Create a sensitive file for the given video file by copying the original file and updating the path.
706
- Uses the central video instance and processing context if parameters not provided.
692
+ def _create_sensitive_file(
693
+ self,
694
+ video_instance: VideoFile | None = None,
695
+ file_path: Path | str | None = None,
696
+ ) -> Path:
697
+ """Create or move a sensitive copy of the raw video file inside storage."""
707
698
 
708
- Args:
709
- video_instance: Optional video instance, defaults to self.current_video
710
- file_path: Optional file path, defaults to processing_context['file_path']
699
+ video = video_instance or self._require_current_video()
711
700
 
712
- Returns:
713
- Path: The path to the created sensitive file.
714
- """
715
- video_file = video_instance or self.current_video
716
- # Always use the currently stored raw file path from the model to avoid deleting external source assets
717
- source_path = None
701
+ raw_field: FieldFile | None = getattr(video, "raw_file", None)
702
+ source_path: Path | None = None
718
703
  try:
719
- if video_file and hasattr(video_file, 'raw_file') and video_file.raw_file and hasattr(video_file.raw_file, 'path'):
720
- source_path = Path(video_file.raw_file.path)
704
+ if raw_field and raw_field.path:
705
+ source_path = Path(raw_field.path)
721
706
  except Exception:
722
707
  source_path = None
723
- # Fallback only if explicitly provided (do NOT default to processing_context input file)
708
+
724
709
  if source_path is None and file_path is not None:
725
710
  source_path = Path(file_path)
726
-
727
- if not video_file:
728
- raise ValueError("No video instance available for creating sensitive file")
729
- if not source_path:
711
+
712
+ if source_path is None:
730
713
  raise ValueError("No file path available for creating sensitive file")
731
-
732
- if not video_file.raw_file:
714
+ if not raw_field:
733
715
  raise ValueError("VideoFile must have a raw_file to create a sensitive file")
734
-
735
- # Ensure the target directory exists
736
- target_dir = VIDEO_DIR / 'sensitive'
716
+
717
+ target_dir = VIDEO_DIR / "sensitive"
737
718
  if not target_dir.exists():
738
- self.logger.info(f"Creating sensitive file directory: {target_dir}")
719
+ self.logger.info("Creating sensitive file directory: %s", target_dir)
739
720
  os.makedirs(target_dir, exist_ok=True)
740
-
741
- # Move the stored raw file into the sensitive directory within storage
721
+
742
722
  target_file_path = target_dir / source_path.name
743
723
  try:
744
- # Prefer a move within the storage to avoid extra disk usage. This does not touch external input files.
745
724
  shutil.move(str(source_path), str(target_file_path))
746
- self.logger.info(f"Moved raw file to sensitive directory: {target_file_path}")
747
- except Exception as e:
748
- # Fallback to copy if move fails (e.g., cross-device or permissions), then remove only the original stored raw file
749
- self.logger.warning(f"Failed to move raw file to sensitive dir, copying instead: {e}")
725
+ self.logger.info("Moved raw file to sensitive directory: %s", target_file_path)
726
+ except Exception as exc:
727
+ self.logger.warning("Failed to move raw file to sensitive dir, copying instead: %s", exc)
750
728
  shutil.copy(str(source_path), str(target_file_path))
751
729
  try:
752
- # Remove only the stored raw file copy; never touch external input paths here
753
730
  os.remove(source_path)
754
731
  except FileNotFoundError:
755
732
  pass
756
-
757
- # Update the model to point to the sensitive file location
758
- # Use relative path from storage root, like in create_from_file.py
733
+
759
734
  try:
760
735
  from endoreg_db.utils import data_paths
736
+
761
737
  storage_root = data_paths["storage"]
762
738
  relative_path = target_file_path.relative_to(storage_root)
763
- video_file.raw_file.name = str(relative_path)
764
- video_file.save(update_fields=['raw_file'])
765
- self.logger.info(f"Updated video.raw_file to point to sensitive location: {relative_path}")
766
- except Exception as e:
767
- # Fallback to absolute path conversion if relative path fails
768
- self.logger.warning(f"Failed to set relative path, using fallback: {e}")
769
- video_file.raw_file.name = f"videos/sensitive/{target_file_path.name}"
770
- video_file.save(update_fields=['raw_file'])
771
- self.logger.info(f"Updated video.raw_file using fallback method: videos/sensitive/{target_file_path.name}")
772
-
773
- # Important: Do NOT remove the original input asset passed to the service here.
774
- # Source file cleanup for external inputs is handled by create_from_file via delete_source flag.
775
-
776
- self.logger.info(f"Created sensitive file for {video_file.uuid} at {target_file_path}")
777
- return target_file_path
778
-
779
-
780
-
739
+ video.raw_file.name = str(relative_path)
740
+ video.save(update_fields=["raw_file"])
741
+ self.logger.info("Updated video.raw_file to point to sensitive location: %s", relative_path)
742
+ except Exception as exc:
743
+ self.logger.warning("Failed to set relative path, using fallback: %s", exc)
744
+ video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
745
+ video.save(update_fields=["raw_file"])
746
+ self.logger.info(
747
+ "Updated video.raw_file using fallback method: videos/sensitive/%s",
748
+ target_file_path.name,
749
+ )
781
750
 
782
- def _ensure_frame_cleaning_available(self):
783
- """
784
- Ensure frame cleaning modules are available by adding lx-anonymizer to path.
785
-
786
- Returns:
787
- Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
788
- """
789
- try:
790
- # Check if we can find the lx-anonymizer directory
791
- from importlib import resources
792
- lx_anonymizer_path = resources.files("lx_anonymizer")
793
-
794
- if lx_anonymizer_path.exists():
795
- # Add to Python path temporarily
796
- if str(lx_anonymizer_path) not in sys.path:
797
- sys.path.insert(0, str(lx_anonymizer_path))
798
-
799
- # Try simple import
800
- from lx_anonymizer import FrameCleaner, ReportReader
801
-
802
- self.logger.info("Successfully imported lx_anonymizer modules")
803
-
804
- # Remove from path to avoid conflicts
805
- if str(lx_anonymizer_path) in sys.path:
806
- sys.path.remove(str(lx_anonymizer_path))
807
-
808
- return True, FrameCleaner, ReportReader
809
-
810
- else:
811
- self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
812
-
813
- except Exception as e:
814
- self.logger.warning(f"Frame cleaning not available: {e}")
815
-
816
- return False, None, None
751
+ self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
752
+ return target_file_path
817
753
 
818
754
  def _get_processor_roi_info(self):
819
755
  """Get processor ROI information for masking."""
820
756
  processor_roi = None
821
757
  endoscope_roi = None
822
-
758
+
759
+ video = self._require_current_video()
760
+
823
761
  try:
824
- if self.current_video.video_meta and self.current_video.video_meta.processor:
825
- processor = getattr(self.current_video.video_meta, "processor", None)
826
-
827
- # Get the endoscope ROI for masking
762
+ video_meta = getattr(video, "video_meta", None)
763
+ processor = getattr(video_meta, "processor", None) if video_meta else None
764
+ if processor:
765
+ assert isinstance(processor, EndoscopyProcessor), "Processor is not of type EndoscopyProcessor"
828
766
  endoscope_roi = processor.get_roi_endoscope_image()
829
-
830
- # Get all processor ROIs for comprehensive masking
831
767
  processor_roi = {
832
- 'endoscope_image': endoscope_roi,
833
- 'patient_first_name': processor.get_roi_patient_first_name(),
834
- 'patient_last_name': processor.get_roi_patient_last_name(),
835
- 'patient_dob': processor.get_roi_patient_dob(),
836
- 'examination_date': processor.get_roi_examination_date(),
837
- 'examination_time': processor.get_roi_examination_time(),
838
- 'endoscope_type': processor.get_roi_endoscope_type(),
839
- 'endoscopy_sn': processor.get_roi_endoscopy_sn(),
768
+ "endoscope_image": endoscope_roi,
769
+ "patient_first_name": processor.get_roi_patient_first_name(),
770
+ "patient_last_name": processor.get_roi_patient_last_name(),
771
+ "patient_dob": processor.get_roi_patient_dob(),
772
+ "examination_date": processor.get_roi_examination_date(),
773
+ "examination_time": processor.get_roi_examination_time(),
774
+ "endoscope_type": processor.get_roi_endoscope_type(),
775
+ "endoscopy_sn": processor.get_roi_endoscopy_sn(),
840
776
  }
841
-
842
- self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
777
+ self.logger.info("Retrieved processor ROI information: endoscope_roi=%s", endoscope_roi)
843
778
  else:
844
- self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
845
-
846
- except Exception as e:
847
- self.logger.error(f"Failed to retrieve processor ROI information: {e}")
848
- # Continue without ROI - don't fail the entire import process
849
-
779
+ self.logger.warning(
780
+ "No processor found for video %s, proceeding without ROI masking",
781
+ video.uuid,
782
+ )
783
+ except Exception as exc:
784
+ self.logger.error("Failed to retrieve processor ROI information: %s", exc)
785
+
850
786
  return processor_roi, endoscope_roi
851
787
 
788
+ def _ensure_default_patient_data(self, video_instance: VideoFile | None = None) -> None:
789
+ """Ensure minimum patient data is present on the video's SensitiveMeta."""
852
790
 
853
- def _ensure_default_patient_data(self, video_instance: "VideoFile" = None) -> None:
854
- """
855
- Ensure video has minimum required patient data in SensitiveMeta.
856
- Creates default values if data is missing after OCR processing.
857
- Uses the central video instance if parameter not provided.
858
-
859
- Args:
860
- video_instance: Optional video instance, defaults to self.current_video
861
- """
862
- video_file = video_instance or self.current_video
863
-
864
- if not video_file:
865
- raise ValueError("No video instance available for ensuring patient data")
866
-
867
- if not video_file.sensitive_meta:
868
- self.logger.info(f"No SensitiveMeta found for video {video_file.uuid}, creating default")
869
-
870
- # Create default SensitiveMeta with placeholder data
791
+ video = video_instance or self._require_current_video()
792
+
793
+ sensitive_meta = getattr(video, "sensitive_meta", None)
794
+ if not sensitive_meta:
795
+ self.logger.info("No SensitiveMeta found for video %s, creating default", video.uuid)
871
796
  default_data = {
872
797
  "patient_first_name": "Patient",
873
- "patient_last_name": "Unknown",
874
- "patient_dob": date(1990, 1, 1), # Default DOB
798
+ "patient_last_name": "Unknown",
799
+ "patient_dob": date(1990, 1, 1),
875
800
  "examination_date": date.today(),
876
- "center_name": video_file.center.name if video_file.center else "university_hospital_wuerzburg"
801
+ "center_name": video.center.name if video.center else "university_hospital_wuerzburg",
877
802
  }
878
-
879
803
  try:
880
804
  sensitive_meta = SensitiveMeta.create_from_dict(default_data)
881
- video_file.sensitive_meta = sensitive_meta
882
- video_file.save(update_fields=['sensitive_meta'])
883
-
884
- # Mark sensitive meta as processed after creating default data
885
- state = video_file.get_or_create_state()
805
+ video.sensitive_meta = sensitive_meta
806
+ video.save(update_fields=["sensitive_meta"])
807
+ state = video.get_or_create_state()
886
808
  state.mark_sensitive_meta_processed(save=True)
887
-
888
- self.logger.info(f"Created default SensitiveMeta for video {video_file.uuid}")
889
- except Exception as e:
890
- self.logger.error(f"Failed to create default SensitiveMeta for video {video_file.uuid}: {e}")
809
+ self.logger.info("Created default SensitiveMeta for video %s", video.uuid)
810
+ except Exception as exc:
811
+ self.logger.error("Failed to create default SensitiveMeta for video %s: %s", video.uuid, exc)
891
812
  return
892
-
893
813
  else:
894
- # Update existing SensitiveMeta with missing fields
895
- update_needed = False
896
- update_data = {}
897
-
898
- if not video_file.sensitive_meta.patient_first_name:
814
+ update_data: Dict[str, Any] = {}
815
+ if not sensitive_meta.patient_first_name:
899
816
  update_data["patient_first_name"] = "Patient"
900
- update_needed = True
901
-
902
- if not video_file.sensitive_meta.patient_last_name:
817
+ if not sensitive_meta.patient_last_name:
903
818
  update_data["patient_last_name"] = "Unknown"
904
- update_needed = True
905
-
906
- if not video_file.sensitive_meta.patient_dob:
819
+ if not sensitive_meta.patient_dob:
907
820
  update_data["patient_dob"] = date(1990, 1, 1)
908
- update_needed = True
909
-
910
- if not video_file.sensitive_meta.examination_date:
821
+ if not sensitive_meta.examination_date:
911
822
  update_data["examination_date"] = date.today()
912
- update_needed = True
913
-
914
- if update_needed:
823
+
824
+ if update_data:
915
825
  try:
916
- video_file.sensitive_meta.update_from_dict(update_data)
917
-
918
- # Mark sensitive meta as processed after updating missing fields
919
- state = video_file.get_or_create_state()
826
+ sensitive_meta.update_from_dict(update_data)
827
+ state = video.get_or_create_state()
920
828
  state.mark_sensitive_meta_processed(save=True)
921
-
922
- self.logger.info(f"Updated missing SensitiveMeta fields for video {video_file.uuid}: {list(update_data.keys())}")
923
- except Exception as e:
924
- self.logger.error(f"Failed to update SensitiveMeta for video {video_file.uuid}: {e}")
829
+ self.logger.info(
830
+ "Updated missing SensitiveMeta fields for video %s: %s",
831
+ video.uuid,
832
+ list(update_data.keys()),
833
+ )
834
+ except Exception as exc:
835
+ self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
836
+
925
837
 
926
838
 
927
839
  def _ensure_frame_cleaning_available(self):
@@ -935,6 +847,9 @@ class VideoImportService():
935
847
  # Check if we can find the lx-anonymizer directory
936
848
  from importlib import resources
937
849
  lx_anonymizer_path = resources.files("lx_anonymizer")
850
+
851
+ # make sure lx_anonymizer_path is a Path object
852
+ lx_anonymizer_path = Path(str(lx_anonymizer_path))
938
853
 
939
854
  if lx_anonymizer_path.exists():
940
855
  # Add to Python path temporarily
@@ -960,39 +875,7 @@ class VideoImportService():
960
875
 
961
876
  return False, None, None
962
877
 
963
- def _get_processor_roi_info(self):
964
- """Get processor ROI information for masking."""
965
- processor_roi = None
966
- endoscope_roi = None
967
-
968
- try:
969
- if self.current_video.video_meta and self.current_video.video_meta.processor:
970
- processor = getattr(self.current_video.video_meta, "processor", None)
971
-
972
- # Get the endoscope ROI for masking
973
- endoscope_roi = processor.get_roi_endoscope_image()
974
-
975
- # Get all processor ROIs for comprehensive masking
976
- processor_roi = {
977
- 'endoscope_image': endoscope_roi,
978
- 'patient_first_name': processor.get_roi_patient_first_name(),
979
- 'patient_last_name': processor.get_roi_patient_last_name(),
980
- 'patient_dob': processor.get_roi_patient_dob(),
981
- 'examination_date': processor.get_roi_examination_date(),
982
- 'examination_time': processor.get_roi_examination_time(),
983
- 'endoscope_type': processor.get_roi_endoscope_type(),
984
- 'endoscopy_sn': processor.get_roi_endoscopy_sn(),
985
- }
986
-
987
- self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
988
- else:
989
- self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
990
-
991
- except Exception as e:
992
- self.logger.error(f"Failed to retrieve processor ROI information: {e}")
993
- # Continue without ROI - don't fail the entire import process
994
-
995
- return processor_roi, endoscope_roi
878
+
996
879
 
997
880
  def _perform_frame_cleaning(self, FrameCleaner, processor_roi, endoscope_roi):
998
881
  """Perform frame cleaning and anonymization."""
@@ -1006,7 +889,9 @@ class VideoImportService():
1006
889
  raise RuntimeError(f"Raw video path not found: {raw_video_path}")
1007
890
 
1008
891
  # Get processor name safely
1009
- processor = getattr(self.current_video.video_meta, "processor", None) if self.current_video.video_meta else None
892
+ video = self._require_current_video()
893
+ video_meta = getattr(video, "video_meta", None)
894
+ processor = getattr(video_meta, "processor", None) if video_meta else None
1010
895
  device_name = processor.name if processor else self.processing_context['processor_name']
1011
896
 
1012
897
  tmp_dir = RAW_FRAME_DIR
@@ -1016,22 +901,25 @@ class VideoImportService():
1016
901
  cleaned_filename = f"cleaned_{video_filename}"
1017
902
  cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
1018
903
 
904
+ processor_roi, endoscope_roi = self._get_processor_roi_info(video)
905
+
906
+ # Processor roi can be used later to OCR preknown regions.
907
+
1019
908
  # Clean video with ROI masking (heavy I/O operation)
1020
909
  actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
1021
- Path(raw_video_path),
1022
- self.current_video,
1023
- tmp_dir,
1024
- device_name,
1025
- endoscope_roi,
1026
- processor_roi,
1027
- cleaned_video_path
910
+ video_path=Path(raw_video_path),
911
+ video_file_obj=video,
912
+ device_name=device_name,
913
+ endoscope_roi=endoscope_roi,
914
+ output_path=cleaned_video_path,
915
+ technique="mask_overlay"
1028
916
  )
1029
917
 
1030
918
  # Optional: enrich metadata using TrOCR+LLM on one random extracted frame
1031
919
  try:
1032
920
  # Prefer frames belonging to this video (UUID in path), else pick any frame
1033
921
  frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
1034
- video_uuid = str(self.current_video.uuid)
922
+ video_uuid = str(video.uuid)
1035
923
  filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
1036
924
  if filtered:
1037
925
  sample_frame = random.choice(filtered)
@@ -1069,10 +957,13 @@ class VideoImportService():
1069
957
  SAFETY MECHANISM: Only updates fields that are empty, default values, or explicitly marked as safe to overwrite.
1070
958
  This prevents accidentally overwriting valuable manually entered or previously extracted data.
1071
959
  """
1072
- if not (self.current_video.sensitive_meta and extracted_metadata):
960
+ video = self._require_current_video()
961
+ sensitive_meta = getattr(video, "sensitive_meta", None)
962
+
963
+ if not (sensitive_meta and extracted_metadata):
1073
964
  return
1074
-
1075
- sm = self.current_video.sensitive_meta
965
+
966
+ sm = sensitive_meta
1076
967
  updated_fields = []
1077
968
 
1078
969
  # Map extracted metadata to SensitiveMeta fields
@@ -1102,48 +993,71 @@ class VideoImportService():
1102
993
 
1103
994
  # Enhanced safety check: Only update if current value is safe to overwrite
1104
995
  if new_value and (old_value in SAFE_TO_OVERWRITE_VALUES):
1105
- self.logger.info(f"Updating {sm_field} from '{old_value}' to '{new_value}' for video {self.current_video.uuid}")
996
+ self.logger.info(
997
+ "Updating %s from '%s' to '%s' for video %s",
998
+ sm_field,
999
+ old_value,
1000
+ new_value,
1001
+ video.uuid,
1002
+ )
1106
1003
  setattr(sm, sm_field, new_value)
1107
1004
  updated_fields.append(sm_field)
1108
1005
  elif new_value and old_value and old_value not in SAFE_TO_OVERWRITE_VALUES:
1109
- self.logger.info(f"Preserving existing {sm_field} value '{old_value}' (not overwriting with '{new_value}') for video {self.current_video.uuid}")
1006
+ self.logger.info(
1007
+ "Preserving existing %s value '%s' (not overwriting with '%s') for video %s",
1008
+ sm_field,
1009
+ old_value,
1010
+ new_value,
1011
+ video.uuid,
1012
+ )
1110
1013
 
1111
1014
  if updated_fields:
1112
1015
  sm.save(update_fields=updated_fields)
1113
- self.logger.info(f"Updated SensitiveMeta fields for video {self.current_video.uuid}: {updated_fields}")
1114
-
1115
- # Mark sensitive meta as processed after successful update
1116
- self.current_video.state.mark_sensitive_meta_processed(save=True)
1117
- self.logger.info(f"Marked sensitive metadata as processed for video {self.current_video.uuid}")
1016
+ self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
1017
+
1018
+ state = video.get_or_create_state()
1019
+ state.mark_sensitive_meta_processed(save=True)
1020
+ self.logger.info("Marked sensitive metadata as processed for video %s", video.uuid)
1118
1021
  else:
1119
- self.logger.info(f"No SensitiveMeta fields updated for video {self.current_video.uuid} - all existing values preserved")
1022
+ self.logger.info("No SensitiveMeta fields updated for video %s - all existing values preserved", video.uuid)
1120
1023
 
1121
1024
  def _signal_completion(self):
1122
1025
  """Signal completion to the tracking system."""
1123
1026
  try:
1027
+ video = self._require_current_video()
1028
+
1029
+ raw_field: FieldFile | None = getattr(video, "raw_file", None)
1030
+ raw_exists = False
1031
+ if raw_field and getattr(raw_field, "path", None):
1032
+ try:
1033
+ raw_exists = Path(raw_field.path).exists()
1034
+ except (ValueError, OSError):
1035
+ raw_exists = False
1036
+
1124
1037
  video_processing_complete = (
1125
- self.current_video.sensitive_meta is not None and
1126
- self.current_video.video_meta is not None and
1127
- self.current_video.raw_file and
1128
- hasattr(self.current_video.raw_file, 'path') and
1129
- Path(self.current_video.raw_file.path).exists()
1038
+ video.sensitive_meta is not None and
1039
+ video.video_meta is not None and
1040
+ raw_exists
1130
1041
  )
1131
-
1042
+
1132
1043
  if video_processing_complete:
1133
- self.logger.info(f"Video {self.current_video.uuid} processing completed successfully - ready for validation")
1134
-
1044
+ self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
1045
+
1135
1046
  # Update completion flags if they exist
1136
1047
  completion_fields = []
1137
1048
  for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
1138
- if hasattr(self.current_video, field_name):
1139
- setattr(self.current_video, field_name, True)
1049
+ if hasattr(video, field_name):
1050
+ setattr(video, field_name, True)
1140
1051
  completion_fields.append(field_name)
1141
1052
 
1142
1053
  if completion_fields:
1143
- self.current_video.save(update_fields=completion_fields)
1144
- self.logger.info(f"Updated completion flags: {completion_fields}")
1054
+ video.save(update_fields=completion_fields)
1055
+ self.logger.info("Updated completion flags: %s", completion_fields)
1145
1056
  else:
1146
- self.logger.warning(f"Video {self.current_video.uuid} processing incomplete - missing required components")
1057
+ self.logger.warning(
1058
+ "Video %s processing incomplete - missing required components",
1059
+ video.uuid,
1060
+ )
1147
1061
 
1148
1062
  except Exception as e:
1149
1063
  self.logger.warning(f"Failed to signal completion status: {e}")
@@ -1186,6 +1100,9 @@ class VideoImportService():
1186
1100
  self.processed_files.remove(file_path_str)
1187
1101
  self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
1188
1102
 
1103
+
1104
+
1105
+
1189
1106
  except Exception as e:
1190
1107
  self.logger.warning(f"Error during context cleanup: {e}")
1191
1108
  finally:
@@ -1200,7 +1117,7 @@ def import_and_anonymize(
1200
1117
  processor_name: str,
1201
1118
  save_video: bool = True,
1202
1119
  delete_source: bool = False,
1203
- ) -> "VideoFile":
1120
+ ) -> VideoFile | None:
1204
1121
  """Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
1205
1122
  Kept for backward compatibility with callers that import this function directly.
1206
1123
  """