endoreg-db 0.8.2.2__py3-none-any.whl → 0.8.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -18,18 +18,13 @@ from contextlib import contextmanager
18
18
  from pathlib import Path
19
19
  from typing import Union, Dict, Any, Optional
20
20
  from django.db import transaction
21
- from transformers.models.align.convert_align_tf_to_hf import get_processor
22
21
  from endoreg_db.models import VideoFile, SensitiveMeta
23
22
  from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
24
23
  import random
25
24
  from lx_anonymizer.ocr import trocr_full_image_ocr
26
25
  from endoreg_db.utils.hashs import get_video_hash
27
- from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
28
- from typing import TYPE_CHECKING
29
- from django.db.models.fields.files import FieldFile
26
+ from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
30
27
 
31
- if TYPE_CHECKING:
32
- from endoreg_db.models import EndoscopyProcessor
33
28
 
34
29
  # File lock configuration (matches PDF import)
35
30
  STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
@@ -64,18 +59,12 @@ class VideoImportService():
64
59
  self.STORAGE_DIR = STORAGE_DIR
65
60
 
66
61
  # Central video instance and processing context
67
- self.current_video: Optional[VideoFile] = None
62
+ self.current_video = None
68
63
  self.processing_context: Dict[str, Any] = {}
69
64
 
70
65
  self.delete_source = False
71
66
 
72
67
  self.logger = logging.getLogger(__name__)
73
-
74
- def _require_current_video(self) -> VideoFile:
75
- """Return the current VideoFile or raise if it has not been initialized."""
76
- if self.current_video is None:
77
- raise RuntimeError("Current video instance is not set")
78
- return self.current_video
79
68
 
80
69
  @contextmanager
81
70
  def _file_lock(self, path: Path):
@@ -167,9 +156,6 @@ class VideoImportService():
167
156
  return None
168
157
  raise
169
158
 
170
- # Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
171
- self._create_sensitive_file()
172
-
173
159
  # Create or retrieve video instance
174
160
  self._create_or_retrieve_video_instance()
175
161
 
@@ -369,25 +355,27 @@ class VideoImportService():
369
355
 
370
356
  def _setup_processing_environment(self):
371
357
  """Setup the processing environment without file movement."""
372
- video = self._require_current_video()
373
-
358
+ # Ensure we have a valid video instance
359
+ if not self.current_video:
360
+ raise RuntimeError("No video instance available for processing environment setup")
361
+
374
362
  # Initialize video specifications
375
- video.initialize_video_specs()
376
-
363
+ self.current_video.initialize_video_specs()
364
+
377
365
  # Initialize frame objects in database
378
- video.initialize_frames()
366
+ self.current_video.initialize_frames()
379
367
 
380
368
  # Extract frames BEFORE processing to prevent pipeline 1 conflicts
381
369
  self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
382
370
  try:
383
- frames_extracted = video.extract_frames(overwrite=False)
371
+ frames_extracted = self.current_video.extract_frames(overwrite=False)
384
372
  if frames_extracted:
385
373
  self.processing_context['frames_extracted'] = True
386
374
  self.logger.info("Frame extraction completed successfully")
387
375
 
388
376
  # CRITICAL: Immediately save the frames_extracted state to database
389
377
  # to prevent refresh_from_db() in pipeline 1 from overriding it
390
- state = video.get_or_create_state()
378
+ state = self.current_video.get_or_create_state()
391
379
  if not state.frames_extracted:
392
380
  state.frames_extracted = True
393
381
  state.save(update_fields=['frames_extracted'])
@@ -400,7 +388,7 @@ class VideoImportService():
400
388
  self.processing_context['frames_extracted'] = False
401
389
 
402
390
  # Ensure default patient data
403
- self._ensure_default_patient_data(video_instance=video)
391
+ self._ensure_default_patient_data()
404
392
 
405
393
  self.logger.info("Processing environment setup completed")
406
394
 
@@ -408,12 +396,11 @@ class VideoImportService():
408
396
  """Process frames and extract metadata with anonymization."""
409
397
  # Check frame cleaning availability
410
398
  frame_cleaning_available, FrameCleaner, ReportReader = self._ensure_frame_cleaning_available()
411
- video = self._require_current_video()
412
-
413
- raw_file_field = video.raw_file
414
- has_raw_file = isinstance(raw_file_field, FieldFile) and bool(raw_file_field.name)
399
+
400
+ _current_video = self.current_video
401
+ assert _current_video is not None, "Current video instance is None during frame processing"
415
402
 
416
- if not (frame_cleaning_available and has_raw_file):
403
+ if not (frame_cleaning_available and _current_video.raw_file):
417
404
  self.logger.warning("Frame cleaning not available or conditions not met, using fallback anonymization.")
418
405
  self._fallback_anonymize_video()
419
406
  return
@@ -440,20 +427,14 @@ class VideoImportService():
440
427
  raw_video_path = self.processing_context.get('raw_video_path')
441
428
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
442
429
  grace_seconds = 60
443
- expected_cleaned_path: Optional[Path] = None
444
- processed_field = video.processed_file
445
- if isinstance(processed_field, FieldFile) and processed_field.name:
446
- try:
447
- expected_cleaned_path = Path(processed_field.path)
448
- except (NotImplementedError, TypeError, ValueError):
449
- expected_cleaned_path = None
430
+ expected_cleaned = self.current_video.processed_file
450
431
  found = False
451
- if expected_cleaned_path is not None:
432
+ if expected_cleaned is not None:
452
433
  for _ in range(grace_seconds):
453
- if expected_cleaned_path.exists():
454
- self.processing_context['cleaned_video_path'] = expected_cleaned_path
434
+ if expected_cleaned.exists():
435
+ self.processing_context['cleaned_video_path'] = expected_cleaned
455
436
  self.processing_context['anonymization_completed'] = True
456
- self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
437
+ self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned)
457
438
  found = True
458
439
  break
459
440
  time.sleep(1)
@@ -461,7 +442,7 @@ class VideoImportService():
461
442
  self._fallback_anonymize_video()
462
443
  if not found:
463
444
  raise TimeoutError("Frame cleaning operation timed out - likely Ollama connection issue")
464
-
445
+
465
446
  except Exception as e:
466
447
  self.logger.warning("Frame cleaning failed (reason: %s), falling back to simple copy", e)
467
448
  # Try fallback anonymization when frame cleaning fails
@@ -474,20 +455,17 @@ class VideoImportService():
474
455
  self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
475
456
 
476
457
  def _save_anonymized_video(self):
477
- video = self._require_current_video()
478
- anonymized_video_path = video.get_target_anonymized_video_path()
479
-
458
+ anonymized_video_path = self.current_video.get_target_anonymized_video_path()
459
+
480
460
  if not anonymized_video_path.exists():
481
- raise RuntimeError(f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}")
461
+ raise RuntimeError(f"Processed video file not found after assembly for {self.current_video.uuid}: {anonymized_video_path}")
482
462
 
483
463
  new_processed_hash = get_video_hash(anonymized_video_path)
484
- if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
485
- raise ValueError(
486
- f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
487
- )
464
+ if type(self.current_video).objects.filter(processed_video_hash=new_processed_hash).exclude(pk=self.current_video.pk).exists():
465
+ raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {self.current_video.uuid}).")
488
466
 
489
- video.processed_video_hash = new_processed_hash
490
- video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
467
+ self.current_video.processed_video_hash = new_processed_hash
468
+ self.current_video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
491
469
 
492
470
  update_fields = [
493
471
  "processed_video_hash",
@@ -496,23 +474,22 @@ class VideoImportService():
496
474
  ]
497
475
 
498
476
  if self.delete_source:
499
- original_raw_file_path_to_delete = video.get_raw_file_path()
500
- original_raw_frame_dir_to_delete = video.get_frame_dir_path()
477
+ original_raw_file_path_to_delete = self.current_video.get_raw_file_path()
478
+ original_raw_frame_dir_to_delete = self.current_video.get_frame_dir_path()
501
479
 
502
- video.raw_file.name = None # type: ignore[assignment]
480
+ self.current_video.raw_file.name = None
503
481
 
504
482
  update_fields.extend(["raw_file", "video_hash"])
505
483
 
506
484
  transaction.on_commit(lambda: _cleanup_raw_assets(
507
- video_uuid=video.uuid,
485
+ video_uuid=self.current_video.uuid,
508
486
  raw_file_path=original_raw_file_path_to_delete,
509
487
  raw_frame_dir=original_raw_frame_dir_to_delete
510
488
  ))
511
489
 
512
- video.save(update_fields=update_fields)
513
- video.state.mark_anonymized(save=True)
514
- video.refresh_from_db()
515
- self.current_video = video
490
+ self.current_video.save(update_fields=update_fields)
491
+ self.current_video.state.mark_anonymized(save=True)
492
+ self.current_video.refresh_from_db()
516
493
  return True
517
494
 
518
495
  def _fallback_anonymize_video(self):
@@ -521,23 +498,23 @@ class VideoImportService():
521
498
  """
522
499
  try:
523
500
  self.logger.info("Attempting fallback video anonymization...")
524
- video = self.current_video
525
- if video is None:
526
- self.logger.warning("No VideoFile instance available for fallback anonymization")
527
- else:
501
+ if self.current_video:
528
502
  # Try VideoFile.pipe_2() method if available
529
- if hasattr(video, 'pipe_2'):
503
+ if hasattr(self.current_video, 'pipe_2'):
530
504
  self.logger.info("Trying VideoFile.pipe_2() method...")
531
- if video.pipe_2():
505
+ if self.current_video.pipe_2():
532
506
  self.logger.info("VideoFile.pipe_2() succeeded")
533
507
  self.processing_context['anonymization_completed'] = True
534
508
  return
535
- self.logger.warning("VideoFile.pipe_2() returned False")
509
+ else:
510
+ self.logger.warning("VideoFile.pipe_2() returned False")
536
511
  # Try direct anonymization via _anonymize
537
- if _anonymize(video, delete_original_raw=self.delete_source):
512
+ if _anonymize(self.current_video, delete_original_raw=self.delete_source):
538
513
  self.logger.info("VideoFile._anonymize() succeeded")
539
514
  self.processing_context['anonymization_completed'] = True
540
515
  return
516
+ else:
517
+ self.logger.warning("No VideoFile instance available for fallback anonymization")
541
518
 
542
519
  # Strategy 2: Simple copy (no processing, just copy raw to processed)
543
520
  self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
@@ -547,19 +524,33 @@ class VideoImportService():
547
524
  except Exception as e:
548
525
  self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
549
526
  self.processing_context['anonymization_completed'] = False
550
- self.processing_context['error_reason'] = str(e)
527
+ self.processing_context['error_reason']
551
528
  def _finalize_processing(self):
552
529
  """Finalize processing and update video state."""
553
530
  self.logger.info("Updating video processing state...")
554
531
 
555
532
  with transaction.atomic():
556
- video = self._require_current_video()
557
- try:
558
- video.refresh_from_db()
559
- except Exception as refresh_error:
560
- self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
533
+ # Update basic processing states
534
+ # Ensure state exists before accessing it
561
535
 
562
- state = video.get_or_create_state()
536
+ if not self.current_video:
537
+ try:
538
+ self.current_video.refresh_from_db()
539
+ except Exception as e:
540
+ self.logger.error(f"Failed to refresh current_video from DB: {e}")
541
+ if not self.current_video:
542
+ raise RuntimeError("No current video instance available for finalization")
543
+
544
+ if not self.current_video.processed_file:
545
+ self.logger.warning("No processed file available for current video")
546
+ self.current_video.processed_file = None # Ensure field is not None
547
+ self.current_video.mark_sensitive_meta_processed = False
548
+ else:
549
+ self.current_video.mark_sensitive_meta_processed = True
550
+
551
+ state = self.current_video.get_or_create_state()
552
+ if not state:
553
+ raise RuntimeError("Failed to get or create video state")
563
554
 
564
555
  # Only mark frames as extracted if they were successfully extracted
565
556
  if self.processing_context.get('frames_extracted', False):
@@ -588,7 +579,10 @@ class VideoImportService():
588
579
 
589
580
  # Save all state changes
590
581
  state.save()
591
- self.logger.info("Video processing state updated")
582
+ self.logger.info("Video processing state updated")
583
+ # Save all state changes
584
+ self.current_video.state.save()
585
+ self.current_video.save()
592
586
 
593
587
  # Signal completion
594
588
  self._signal_completion()
@@ -596,48 +590,59 @@ class VideoImportService():
596
590
  def _cleanup_and_archive(self):
597
591
  """Move processed video to anonym_videos and cleanup."""
598
592
  from endoreg_db.utils import data_paths
599
-
593
+
594
+ # Define target directory for processed videos
600
595
  anonym_videos_dir = data_paths["anonym_video"] # /data/anonym_videos
601
596
  anonym_videos_dir.mkdir(parents=True, exist_ok=True)
602
-
603
- video = self._require_current_video()
604
-
597
+
598
+ # Check if we have a processed/cleaned video
605
599
  processed_video_path = None
600
+
601
+ # Look for cleaned video from frame cleaning process
606
602
  if 'cleaned_video_path' in self.processing_context:
607
603
  processed_video_path = self.processing_context['cleaned_video_path']
608
604
  else:
605
+ # If no processing occurred, copy from raw video location
609
606
  raw_video_path = self.processing_context.get('raw_video_path')
610
607
  if raw_video_path and Path(raw_video_path).exists():
611
608
  video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
612
609
  processed_filename = f"processed_{video_filename}"
613
610
  processed_video_path = Path(raw_video_path).parent / processed_filename
611
+
612
+ # Copy raw to processed location (will be moved to anonym_videos)
614
613
  try:
615
614
  shutil.copy2(str(raw_video_path), str(processed_video_path))
616
615
  self.logger.info("Copied raw video for processing: %s", processed_video_path)
617
- except Exception as exc:
618
- self.logger.error("Failed to copy raw video: %s", exc)
619
- processed_video_path = None
620
-
616
+ except Exception as e:
617
+ self.logger.error("Failed to copy raw video: %s", e)
618
+ processed_video_path = None # FIXED: Don't use raw as fallback
619
+
620
+ # Move processed video to anonym_videos ONLY if it exists
621
621
  if processed_video_path and Path(processed_video_path).exists():
622
622
  try:
623
+ # ✅ Clean filename: no original filename leakage
623
624
  ext = Path(processed_video_path).suffix or ".mp4"
624
- anonym_video_filename = f"anonym_{video.uuid}{ext}"
625
+ anonym_video_filename = f"anonym_{self.current_video.uuid}{ext}"
625
626
  anonym_target_path = anonym_videos_dir / anonym_video_filename
626
627
 
628
+ # Move processed video to anonym_videos/
627
629
  shutil.move(str(processed_video_path), str(anonym_target_path))
628
630
  self.logger.info("Moved processed video to: %s", anonym_target_path)
629
631
 
632
+ # Verify the file actually exists before updating database
630
633
  if anonym_target_path.exists():
631
634
  try:
632
635
  storage_root = data_paths["storage"]
633
636
  relative_path = anonym_target_path.relative_to(storage_root)
634
- video.processed_file.name = str(relative_path)
635
- video.save(update_fields=["processed_file"])
637
+ # Save relative path (e.g. anonym_videos/anonym_<uuid>.mp4)
638
+ self.current_video.processed_file.name = str(relative_path)
639
+ self.current_video.save(update_fields=["processed_file"])
636
640
  self.logger.info("Updated processed_file path to: %s", relative_path)
637
- except Exception as exc:
638
- self.logger.error("Failed to update processed_file path: %s", exc)
639
- video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
640
- video.save(update_fields=['processed_file'])
641
+ except Exception as e:
642
+ self.logger.error("Failed to update processed_file path: %s", e)
643
+ # Fallback to simple relative path
644
+ self.current_video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
645
+ self.current_video.save(update_fields=['processed_file'])
641
646
  self.logger.info(
642
647
  "Updated processed_file path using fallback: %s",
643
648
  f"anonym_videos/{anonym_video_filename}",
@@ -646,194 +651,277 @@ class VideoImportService():
646
651
  self.processing_context['anonymization_completed'] = True
647
652
  else:
648
653
  self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
649
- except Exception as exc:
650
- self.logger.error("Failed to move processed video to anonym_videos: %s", exc)
654
+ except Exception as e:
655
+ self.logger.error("Failed to move processed video to anonym_videos: %s", e)
651
656
  else:
652
657
  self.logger.warning("No processed video available - processed_file will remain empty")
653
-
658
+ # Leave processed_file empty/null - frontend should fall back to raw_file
659
+
660
+ # Cleanup temporary directories
654
661
  try:
655
662
  from endoreg_db.utils.paths import RAW_FRAME_DIR
656
663
  shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
657
664
  self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
658
- except Exception as exc:
659
- self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
660
-
665
+ except Exception as e:
666
+ self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, e)
667
+
668
+ # Handle source file deletion - this should already be moved, but check raw_videos
661
669
  source_path = self.processing_context['file_path']
662
670
  if self.processing_context['delete_source'] and Path(source_path).exists():
663
671
  try:
664
672
  os.remove(source_path)
665
673
  self.logger.info("Removed remaining source file: %s", source_path)
666
- except Exception as exc:
667
- self.logger.warning("Failed to remove source file %s: %s", source_path, exc)
668
-
669
- if not video.processed_file or not Path(video.processed_file.path).exists():
670
- self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
671
- try:
672
- video.anonymize(delete_original_raw=self.delete_source)
673
- video.save(update_fields=['processed_file'])
674
- self.logger.info("Late-stage anonymization succeeded")
675
674
  except Exception as e:
676
- self.logger.error("Late-stage anonymization failed: %s", e)
677
- self.processing_context['anonymization_completed'] = False
678
-
675
+ self.logger.warning("Failed to remove source file %s: %s", source_path, e)
676
+
677
+ # Check if processed video exists and otherwise call anonymize
678
+
679
+ if not self.current_video.processed_file or not Path(self.current_video.processed_file.path).exists():
680
+ self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
681
+ self.current_video.anonymize(delete_original_raw=self.delete_source)
682
+ self.current_video.save(update_fields=['processed_file'])
683
+
684
+
679
685
  self.logger.info("Cleanup and archiving completed")
680
-
686
+
687
+
688
+
689
+ # Mark as processed (in-memory tracking)
681
690
  self.processed_files.add(str(self.processing_context['file_path']))
682
-
691
+
692
+ # Refresh from database and finalize state
683
693
  with transaction.atomic():
684
- video.refresh_from_db()
685
- if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
686
- video.state.mark_sensitive_meta_processed(save=True)
687
-
688
- self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
694
+ self.current_video.refresh_from_db()
695
+ if hasattr(self.current_video, 'state') and self.processing_context.get('anonymization_completed'):
696
+ self.current_video.state.mark_sensitive_meta_processed(save=True)
697
+
698
+
699
+ self.logger.info("Import and anonymization completed for VideoFile UUID: %s", self.current_video.uuid)
689
700
  self.logger.info("Raw video stored in: /data/videos")
690
701
  self.logger.info("Processed video stored in: /data/anonym_videos")
691
702
 
692
- def _create_sensitive_file(
693
- self,
694
- video_instance: VideoFile | None = None,
695
- file_path: Path | str | None = None,
696
- ) -> Path:
697
- """Create or move a sensitive copy of the raw video file inside storage."""
703
+ def _create_sensitive_file(self, video_instance: "VideoFile" = None, file_path: Union[Path, str] = None) -> Path:
704
+ """
705
+ Create a sensitive file for the given video file by copying the original file and updating the path.
706
+ Uses the central video instance and processing context if parameters not provided.
698
707
 
699
- video = video_instance or self._require_current_video()
708
+ Args:
709
+ video_instance: Optional video instance, defaults to self.current_video
710
+ file_path: Optional file path, defaults to processing_context['file_path']
700
711
 
701
- raw_field: FieldFile | None = getattr(video, "raw_file", None)
702
- source_path: Path | None = None
712
+ Returns:
713
+ Path: The path to the created sensitive file.
714
+ """
715
+ video_file = video_instance or self.current_video
716
+ # Always use the currently stored raw file path from the model to avoid deleting external source assets
717
+ source_path = None
703
718
  try:
704
- if raw_field and raw_field.path:
705
- source_path = Path(raw_field.path)
719
+ if video_file and hasattr(video_file, 'raw_file') and video_file.raw_file and hasattr(video_file.raw_file, 'path'):
720
+ source_path = Path(video_file.raw_file.path)
706
721
  except Exception:
707
722
  source_path = None
708
-
723
+ # Fallback only if explicitly provided (do NOT default to processing_context input file)
709
724
  if source_path is None and file_path is not None:
710
725
  source_path = Path(file_path)
711
-
712
- if source_path is None:
726
+
727
+ if not video_file:
728
+ raise ValueError("No video instance available for creating sensitive file")
729
+ if not source_path:
713
730
  raise ValueError("No file path available for creating sensitive file")
714
- if not raw_field:
731
+
732
+ if not video_file.raw_file:
715
733
  raise ValueError("VideoFile must have a raw_file to create a sensitive file")
716
-
717
- target_dir = VIDEO_DIR / "sensitive"
734
+
735
+ # Ensure the target directory exists
736
+ target_dir = VIDEO_DIR / 'sensitive'
718
737
  if not target_dir.exists():
719
- self.logger.info("Creating sensitive file directory: %s", target_dir)
738
+ self.logger.info(f"Creating sensitive file directory: {target_dir}")
720
739
  os.makedirs(target_dir, exist_ok=True)
721
-
740
+
741
+ # Move the stored raw file into the sensitive directory within storage
722
742
  target_file_path = target_dir / source_path.name
723
743
  try:
744
+ # Prefer a move within the storage to avoid extra disk usage. This does not touch external input files.
724
745
  shutil.move(str(source_path), str(target_file_path))
725
- self.logger.info("Moved raw file to sensitive directory: %s", target_file_path)
726
- except Exception as exc:
727
- self.logger.warning("Failed to move raw file to sensitive dir, copying instead: %s", exc)
746
+ self.logger.info(f"Moved raw file to sensitive directory: {target_file_path}")
747
+ except Exception as e:
748
+ # Fallback to copy if move fails (e.g., cross-device or permissions), then remove only the original stored raw file
749
+ self.logger.warning(f"Failed to move raw file to sensitive dir, copying instead: {e}")
728
750
  shutil.copy(str(source_path), str(target_file_path))
729
751
  try:
752
+ # Remove only the stored raw file copy; never touch external input paths here
730
753
  os.remove(source_path)
731
754
  except FileNotFoundError:
732
755
  pass
733
-
756
+
757
+ # Update the model to point to the sensitive file location
758
+ # Use relative path from storage root, like in create_from_file.py
734
759
  try:
735
760
  from endoreg_db.utils import data_paths
736
-
737
761
  storage_root = data_paths["storage"]
738
762
  relative_path = target_file_path.relative_to(storage_root)
739
- video.raw_file.name = str(relative_path)
740
- video.save(update_fields=["raw_file"])
741
- self.logger.info("Updated video.raw_file to point to sensitive location: %s", relative_path)
742
- except Exception as exc:
743
- self.logger.warning("Failed to set relative path, using fallback: %s", exc)
744
- video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
745
- video.save(update_fields=["raw_file"])
746
- self.logger.info(
747
- "Updated video.raw_file using fallback method: videos/sensitive/%s",
748
- target_file_path.name,
749
- )
750
-
751
- self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
763
+ video_file.raw_file.name = str(relative_path)
764
+ video_file.save(update_fields=['raw_file'])
765
+ self.logger.info(f"Updated video.raw_file to point to sensitive location: {relative_path}")
766
+ except Exception as e:
767
+ # Fallback to absolute path conversion if relative path fails
768
+ self.logger.warning(f"Failed to set relative path, using fallback: {e}")
769
+ video_file.raw_file.name = f"videos/sensitive/{target_file_path.name}"
770
+ video_file.save(update_fields=['raw_file'])
771
+ self.logger.info(f"Updated video.raw_file using fallback method: videos/sensitive/{target_file_path.name}")
772
+
773
+ # Important: Do NOT remove the original input asset passed to the service here.
774
+ # Source file cleanup for external inputs is handled by create_from_file via delete_source flag.
775
+
776
+ self.logger.info(f"Created sensitive file for {video_file.uuid} at {target_file_path}")
752
777
  return target_file_path
753
778
 
779
+
780
+
781
+
782
+ def _ensure_frame_cleaning_available(self):
783
+ """
784
+ Ensure frame cleaning modules are available by adding lx-anonymizer to path.
785
+
786
+ Returns:
787
+ Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
788
+ """
789
+ try:
790
+ # Check if we can find the lx-anonymizer directory
791
+ from importlib import resources
792
+ lx_anonymizer_path = resources.files("lx_anonymizer")
793
+
794
+ if lx_anonymizer_path.exists():
795
+ # Add to Python path temporarily
796
+ if str(lx_anonymizer_path) not in sys.path:
797
+ sys.path.insert(0, str(lx_anonymizer_path))
798
+
799
+ # Try simple import
800
+ from lx_anonymizer import FrameCleaner, ReportReader
801
+
802
+ self.logger.info("Successfully imported lx_anonymizer modules")
803
+
804
+ # Remove from path to avoid conflicts
805
+ if str(lx_anonymizer_path) in sys.path:
806
+ sys.path.remove(str(lx_anonymizer_path))
807
+
808
+ return True, FrameCleaner, ReportReader
809
+
810
+ else:
811
+ self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
812
+
813
+ except Exception as e:
814
+ self.logger.warning(f"Frame cleaning not available: {e}")
815
+
816
+ return False, None, None
817
+
754
818
  def _get_processor_roi_info(self):
755
819
  """Get processor ROI information for masking."""
756
820
  processor_roi = None
757
821
  endoscope_roi = None
758
-
759
- video = self._require_current_video()
760
-
822
+
761
823
  try:
762
- video_meta = getattr(video, "video_meta", None)
763
- processor = getattr(video_meta, "processor", None) if video_meta else None
764
- if processor:
765
- assert isinstance(processor, EndoscopyProcessor), "Processor is not of type EndoscopyProcessor"
824
+ if self.current_video.video_meta and self.current_video.video_meta.processor:
825
+ processor = getattr(self.current_video.video_meta, "processor", None)
826
+
827
+ # Get the endoscope ROI for masking
766
828
  endoscope_roi = processor.get_roi_endoscope_image()
829
+
830
+ # Get all processor ROIs for comprehensive masking
767
831
  processor_roi = {
768
- "endoscope_image": endoscope_roi,
769
- "patient_first_name": processor.get_roi_patient_first_name(),
770
- "patient_last_name": processor.get_roi_patient_last_name(),
771
- "patient_dob": processor.get_roi_patient_dob(),
772
- "examination_date": processor.get_roi_examination_date(),
773
- "examination_time": processor.get_roi_examination_time(),
774
- "endoscope_type": processor.get_roi_endoscope_type(),
775
- "endoscopy_sn": processor.get_roi_endoscopy_sn(),
832
+ 'endoscope_image': endoscope_roi,
833
+ 'patient_first_name': processor.get_roi_patient_first_name(),
834
+ 'patient_last_name': processor.get_roi_patient_last_name(),
835
+ 'patient_dob': processor.get_roi_patient_dob(),
836
+ 'examination_date': processor.get_roi_examination_date(),
837
+ 'examination_time': processor.get_roi_examination_time(),
838
+ 'endoscope_type': processor.get_roi_endoscope_type(),
839
+ 'endoscopy_sn': processor.get_roi_endoscopy_sn(),
776
840
  }
777
- self.logger.info("Retrieved processor ROI information: endoscope_roi=%s", endoscope_roi)
841
+
842
+ self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
778
843
  else:
779
- self.logger.warning(
780
- "No processor found for video %s, proceeding without ROI masking",
781
- video.uuid,
782
- )
783
- except Exception as exc:
784
- self.logger.error("Failed to retrieve processor ROI information: %s", exc)
785
-
844
+ self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
845
+
846
+ except Exception as e:
847
+ self.logger.error(f"Failed to retrieve processor ROI information: {e}")
848
+ # Continue without ROI - don't fail the entire import process
849
+
786
850
  return processor_roi, endoscope_roi
787
851
 
788
- def _ensure_default_patient_data(self, video_instance: VideoFile | None = None) -> None:
789
- """Ensure minimum patient data is present on the video's SensitiveMeta."""
790
-
791
- video = video_instance or self._require_current_video()
792
852
 
793
- sensitive_meta = getattr(video, "sensitive_meta", None)
794
- if not sensitive_meta:
795
- self.logger.info("No SensitiveMeta found for video %s, creating default", video.uuid)
853
+ def _ensure_default_patient_data(self, video_instance: "VideoFile" = None) -> None:
854
+ """
855
+ Ensure video has minimum required patient data in SensitiveMeta.
856
+ Creates default values if data is missing after OCR processing.
857
+ Uses the central video instance if parameter not provided.
858
+
859
+ Args:
860
+ video_instance: Optional video instance, defaults to self.current_video
861
+ """
862
+ video_file = video_instance or self.current_video
863
+
864
+ if not video_file:
865
+ raise ValueError("No video instance available for ensuring patient data")
866
+
867
+ if not video_file.sensitive_meta:
868
+ self.logger.info(f"No SensitiveMeta found for video {video_file.uuid}, creating default")
869
+
870
+ # Create default SensitiveMeta with placeholder data
796
871
  default_data = {
797
872
  "patient_first_name": "Patient",
798
- "patient_last_name": "Unknown",
799
- "patient_dob": date(1990, 1, 1),
873
+ "patient_last_name": "Unknown",
874
+ "patient_dob": date(1990, 1, 1), # Default DOB
800
875
  "examination_date": date.today(),
801
- "center_name": video.center.name if video.center else "university_hospital_wuerzburg",
876
+ "center_name": video_file.center.name if video_file.center else "university_hospital_wuerzburg"
802
877
  }
878
+
803
879
  try:
804
880
  sensitive_meta = SensitiveMeta.create_from_dict(default_data)
805
- video.sensitive_meta = sensitive_meta
806
- video.save(update_fields=["sensitive_meta"])
807
- state = video.get_or_create_state()
881
+ video_file.sensitive_meta = sensitive_meta
882
+ video_file.save(update_fields=['sensitive_meta'])
883
+
884
+ # Mark sensitive meta as processed after creating default data
885
+ state = video_file.get_or_create_state()
808
886
  state.mark_sensitive_meta_processed(save=True)
809
- self.logger.info("Created default SensitiveMeta for video %s", video.uuid)
810
- except Exception as exc:
811
- self.logger.error("Failed to create default SensitiveMeta for video %s: %s", video.uuid, exc)
887
+
888
+ self.logger.info(f"Created default SensitiveMeta for video {video_file.uuid}")
889
+ except Exception as e:
890
+ self.logger.error(f"Failed to create default SensitiveMeta for video {video_file.uuid}: {e}")
812
891
  return
892
+
813
893
  else:
814
- update_data: Dict[str, Any] = {}
815
- if not sensitive_meta.patient_first_name:
894
+ # Update existing SensitiveMeta with missing fields
895
+ update_needed = False
896
+ update_data = {}
897
+
898
+ if not video_file.sensitive_meta.patient_first_name:
816
899
  update_data["patient_first_name"] = "Patient"
817
- if not sensitive_meta.patient_last_name:
900
+ update_needed = True
901
+
902
+ if not video_file.sensitive_meta.patient_last_name:
818
903
  update_data["patient_last_name"] = "Unknown"
819
- if not sensitive_meta.patient_dob:
904
+ update_needed = True
905
+
906
+ if not video_file.sensitive_meta.patient_dob:
820
907
  update_data["patient_dob"] = date(1990, 1, 1)
821
- if not sensitive_meta.examination_date:
908
+ update_needed = True
909
+
910
+ if not video_file.sensitive_meta.examination_date:
822
911
  update_data["examination_date"] = date.today()
823
-
824
- if update_data:
912
+ update_needed = True
913
+
914
+ if update_needed:
825
915
  try:
826
- sensitive_meta.update_from_dict(update_data)
827
- state = video.get_or_create_state()
916
+ video_file.sensitive_meta.update_from_dict(update_data)
917
+
918
+ # Mark sensitive meta as processed after updating missing fields
919
+ state = video_file.get_or_create_state()
828
920
  state.mark_sensitive_meta_processed(save=True)
829
- self.logger.info(
830
- "Updated missing SensitiveMeta fields for video %s: %s",
831
- video.uuid,
832
- list(update_data.keys()),
833
- )
834
- except Exception as exc:
835
- self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
836
-
921
+
922
+ self.logger.info(f"Updated missing SensitiveMeta fields for video {video_file.uuid}: {list(update_data.keys())}")
923
+ except Exception as e:
924
+ self.logger.error(f"Failed to update SensitiveMeta for video {video_file.uuid}: {e}")
837
925
 
838
926
 
839
927
  def _ensure_frame_cleaning_available(self):
@@ -847,9 +935,6 @@ class VideoImportService():
847
935
  # Check if we can find the lx-anonymizer directory
848
936
  from importlib import resources
849
937
  lx_anonymizer_path = resources.files("lx_anonymizer")
850
-
851
- # make sure lx_anonymizer_path is a Path object
852
- lx_anonymizer_path = Path(str(lx_anonymizer_path))
853
938
 
854
939
  if lx_anonymizer_path.exists():
855
940
  # Add to Python path temporarily
@@ -875,7 +960,39 @@ class VideoImportService():
875
960
 
876
961
  return False, None, None
877
962
 
878
-
963
+ def _get_processor_roi_info(self):
964
+ """Get processor ROI information for masking."""
965
+ processor_roi = None
966
+ endoscope_roi = None
967
+
968
+ try:
969
+ if self.current_video.video_meta and self.current_video.video_meta.processor:
970
+ processor = getattr(self.current_video.video_meta, "processor", None)
971
+
972
+ # Get the endoscope ROI for masking
973
+ endoscope_roi = processor.get_roi_endoscope_image()
974
+
975
+ # Get all processor ROIs for comprehensive masking
976
+ processor_roi = {
977
+ 'endoscope_image': endoscope_roi,
978
+ 'patient_first_name': processor.get_roi_patient_first_name(),
979
+ 'patient_last_name': processor.get_roi_patient_last_name(),
980
+ 'patient_dob': processor.get_roi_patient_dob(),
981
+ 'examination_date': processor.get_roi_examination_date(),
982
+ 'examination_time': processor.get_roi_examination_time(),
983
+ 'endoscope_type': processor.get_roi_endoscope_type(),
984
+ 'endoscopy_sn': processor.get_roi_endoscopy_sn(),
985
+ }
986
+
987
+ self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
988
+ else:
989
+ self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
990
+
991
+ except Exception as e:
992
+ self.logger.error(f"Failed to retrieve processor ROI information: {e}")
993
+ # Continue without ROI - don't fail the entire import process
994
+
995
+ return processor_roi, endoscope_roi
879
996
 
880
997
  def _perform_frame_cleaning(self, FrameCleaner, processor_roi, endoscope_roi):
881
998
  """Perform frame cleaning and anonymization."""
@@ -889,9 +1006,7 @@ class VideoImportService():
889
1006
  raise RuntimeError(f"Raw video path not found: {raw_video_path}")
890
1007
 
891
1008
  # Get processor name safely
892
- video = self._require_current_video()
893
- video_meta = getattr(video, "video_meta", None)
894
- processor = getattr(video_meta, "processor", None) if video_meta else None
1009
+ processor = getattr(self.current_video.video_meta, "processor", None) if self.current_video.video_meta else None
895
1010
  device_name = processor.name if processor else self.processing_context['processor_name']
896
1011
 
897
1012
  tmp_dir = RAW_FRAME_DIR
@@ -901,25 +1016,22 @@ class VideoImportService():
901
1016
  cleaned_filename = f"cleaned_{video_filename}"
902
1017
  cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
903
1018
 
904
- processor_roi, endoscope_roi = self._get_processor_roi_info(video)
905
-
906
- # Processor roi can be used later to OCR preknown regions.
907
-
908
1019
  # Clean video with ROI masking (heavy I/O operation)
909
1020
  actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
910
- video_path=Path(raw_video_path),
911
- video_file_obj=video,
912
- device_name=device_name,
913
- endoscope_roi=endoscope_roi,
914
- output_path=cleaned_video_path,
915
- technique="mask_overlay"
1021
+ Path(raw_video_path),
1022
+ self.current_video,
1023
+ tmp_dir,
1024
+ device_name,
1025
+ endoscope_roi,
1026
+ processor_roi,
1027
+ cleaned_video_path
916
1028
  )
917
1029
 
918
1030
  # Optional: enrich metadata using TrOCR+LLM on one random extracted frame
919
1031
  try:
920
1032
  # Prefer frames belonging to this video (UUID in path), else pick any frame
921
1033
  frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
922
- video_uuid = str(video.uuid)
1034
+ video_uuid = str(self.current_video.uuid)
923
1035
  filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
924
1036
  if filtered:
925
1037
  sample_frame = random.choice(filtered)
@@ -957,13 +1069,10 @@ class VideoImportService():
957
1069
  SAFETY MECHANISM: Only updates fields that are empty, default values, or explicitly marked as safe to overwrite.
958
1070
  This prevents accidentally overwriting valuable manually entered or previously extracted data.
959
1071
  """
960
- video = self._require_current_video()
961
- sensitive_meta = getattr(video, "sensitive_meta", None)
962
-
963
- if not (sensitive_meta and extracted_metadata):
1072
+ if not (self.current_video.sensitive_meta and extracted_metadata):
964
1073
  return
965
-
966
- sm = sensitive_meta
1074
+
1075
+ sm = self.current_video.sensitive_meta
967
1076
  updated_fields = []
968
1077
 
969
1078
  # Map extracted metadata to SensitiveMeta fields
@@ -993,71 +1102,48 @@ class VideoImportService():
993
1102
 
994
1103
  # Enhanced safety check: Only update if current value is safe to overwrite
995
1104
  if new_value and (old_value in SAFE_TO_OVERWRITE_VALUES):
996
- self.logger.info(
997
- "Updating %s from '%s' to '%s' for video %s",
998
- sm_field,
999
- old_value,
1000
- new_value,
1001
- video.uuid,
1002
- )
1105
+ self.logger.info(f"Updating {sm_field} from '{old_value}' to '{new_value}' for video {self.current_video.uuid}")
1003
1106
  setattr(sm, sm_field, new_value)
1004
1107
  updated_fields.append(sm_field)
1005
1108
  elif new_value and old_value and old_value not in SAFE_TO_OVERWRITE_VALUES:
1006
- self.logger.info(
1007
- "Preserving existing %s value '%s' (not overwriting with '%s') for video %s",
1008
- sm_field,
1009
- old_value,
1010
- new_value,
1011
- video.uuid,
1012
- )
1109
+ self.logger.info(f"Preserving existing {sm_field} value '{old_value}' (not overwriting with '{new_value}') for video {self.current_video.uuid}")
1013
1110
 
1014
1111
  if updated_fields:
1015
1112
  sm.save(update_fields=updated_fields)
1016
- self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
1017
-
1018
- state = video.get_or_create_state()
1019
- state.mark_sensitive_meta_processed(save=True)
1020
- self.logger.info("Marked sensitive metadata as processed for video %s", video.uuid)
1113
+ self.logger.info(f"Updated SensitiveMeta fields for video {self.current_video.uuid}: {updated_fields}")
1114
+
1115
+ # Mark sensitive meta as processed after successful update
1116
+ self.current_video.state.mark_sensitive_meta_processed(save=True)
1117
+ self.logger.info(f"Marked sensitive metadata as processed for video {self.current_video.uuid}")
1021
1118
  else:
1022
- self.logger.info("No SensitiveMeta fields updated for video %s - all existing values preserved", video.uuid)
1119
+ self.logger.info(f"No SensitiveMeta fields updated for video {self.current_video.uuid} - all existing values preserved")
1023
1120
 
1024
1121
  def _signal_completion(self):
1025
1122
  """Signal completion to the tracking system."""
1026
1123
  try:
1027
- video = self._require_current_video()
1028
-
1029
- raw_field: FieldFile | None = getattr(video, "raw_file", None)
1030
- raw_exists = False
1031
- if raw_field and getattr(raw_field, "path", None):
1032
- try:
1033
- raw_exists = Path(raw_field.path).exists()
1034
- except (ValueError, OSError):
1035
- raw_exists = False
1036
-
1037
1124
  video_processing_complete = (
1038
- video.sensitive_meta is not None and
1039
- video.video_meta is not None and
1040
- raw_exists
1125
+ self.current_video.sensitive_meta is not None and
1126
+ self.current_video.video_meta is not None and
1127
+ self.current_video.raw_file and
1128
+ hasattr(self.current_video.raw_file, 'path') and
1129
+ Path(self.current_video.raw_file.path).exists()
1041
1130
  )
1042
-
1131
+
1043
1132
  if video_processing_complete:
1044
- self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
1045
-
1133
+ self.logger.info(f"Video {self.current_video.uuid} processing completed successfully - ready for validation")
1134
+
1046
1135
  # Update completion flags if they exist
1047
1136
  completion_fields = []
1048
1137
  for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
1049
- if hasattr(video, field_name):
1050
- setattr(video, field_name, True)
1138
+ if hasattr(self.current_video, field_name):
1139
+ setattr(self.current_video, field_name, True)
1051
1140
  completion_fields.append(field_name)
1052
1141
 
1053
1142
  if completion_fields:
1054
- video.save(update_fields=completion_fields)
1055
- self.logger.info("Updated completion flags: %s", completion_fields)
1143
+ self.current_video.save(update_fields=completion_fields)
1144
+ self.logger.info(f"Updated completion flags: {completion_fields}")
1056
1145
  else:
1057
- self.logger.warning(
1058
- "Video %s processing incomplete - missing required components",
1059
- video.uuid,
1060
- )
1146
+ self.logger.warning(f"Video {self.current_video.uuid} processing incomplete - missing required components")
1061
1147
 
1062
1148
  except Exception as e:
1063
1149
  self.logger.warning(f"Failed to signal completion status: {e}")
@@ -1100,9 +1186,6 @@ class VideoImportService():
1100
1186
  self.processed_files.remove(file_path_str)
1101
1187
  self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
1102
1188
 
1103
-
1104
-
1105
-
1106
1189
  except Exception as e:
1107
1190
  self.logger.warning(f"Error during context cleanup: {e}")
1108
1191
  finally:
@@ -1117,7 +1200,7 @@ def import_and_anonymize(
1117
1200
  processor_name: str,
1118
1201
  save_video: bool = True,
1119
1202
  delete_source: bool = False,
1120
- ) -> VideoFile | None:
1203
+ ) -> "VideoFile":
1121
1204
  """Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
1122
1205
  Kept for backward compatibility with callers that import this function directly.
1123
1206
  """