endoreg-db 0.8.2.4__py3-none-any.whl → 0.8.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/helpers/default_objects.py +48 -29
- endoreg_db/management/commands/import_video.py +5 -4
- endoreg_db/models/__init__.py +4 -4
- endoreg_db/models/media/__init__.py +3 -1
- endoreg_db/models/media/video/__init__.py +4 -0
- endoreg_db/models/media/video/video_file.py +53 -44
- endoreg_db/models/{video_metadata.py → media/video/video_metadata.py} +1 -2
- endoreg_db/models/{video_processing.py → media/video/video_processing.py} +1 -2
- endoreg_db/models/medical/hardware/endoscopy_processor.py +28 -10
- endoreg_db/models/metadata/sensitive_meta.py +8 -8
- endoreg_db/serializers/video/video_metadata.py +1 -1
- endoreg_db/services/pseudonym_service.py +1 -1
- endoreg_db/services/video_import.py +275 -410
- endoreg_db/urls/media.py +1 -9
- endoreg_db/utils/paths.py +15 -16
- endoreg_db/views/__init__.py +1 -13
- endoreg_db/views/video/__init__.py +0 -4
- endoreg_db/views/video/correction.py +20 -177
- {endoreg_db-0.8.2.4.dist-info → endoreg_db-0.8.2.5.dist-info}/METADATA +2 -2
- {endoreg_db-0.8.2.4.dist-info → endoreg_db-0.8.2.5.dist-info}/RECORD +22 -25
- endoreg_db/models/media/video/video_file_meta.py +0 -11
- endoreg_db/services/ollama_api_docs.py +0 -1528
- endoreg_db/views/video/video_reprocess.py +0 -40
- {endoreg_db-0.8.2.4.dist-info → endoreg_db-0.8.2.5.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.2.4.dist-info → endoreg_db-0.8.2.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -16,15 +16,19 @@ import shutil
|
|
|
16
16
|
import time
|
|
17
17
|
from contextlib import contextmanager
|
|
18
18
|
from pathlib import Path
|
|
19
|
-
from typing import Union, Dict, Any, Optional
|
|
19
|
+
from typing import Union, Dict, Any, Optional, List, Tuple
|
|
20
20
|
from django.db import transaction
|
|
21
21
|
from endoreg_db.models import VideoFile, SensitiveMeta
|
|
22
22
|
from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
|
|
23
23
|
import random
|
|
24
24
|
from lx_anonymizer.ocr import trocr_full_image_ocr
|
|
25
25
|
from endoreg_db.utils.hashs import get_video_hash
|
|
26
|
-
from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
|
|
26
|
+
from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
|
|
27
|
+
from typing import TYPE_CHECKING
|
|
28
|
+
from django.db.models.fields.files import FieldFile
|
|
27
29
|
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from endoreg_db.models import EndoscopyProcessor
|
|
28
32
|
|
|
29
33
|
# File lock configuration (matches PDF import)
|
|
30
34
|
STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
|
|
@@ -59,12 +63,18 @@ class VideoImportService():
|
|
|
59
63
|
self.STORAGE_DIR = STORAGE_DIR
|
|
60
64
|
|
|
61
65
|
# Central video instance and processing context
|
|
62
|
-
self.current_video = None
|
|
66
|
+
self.current_video: Optional[VideoFile] = None
|
|
63
67
|
self.processing_context: Dict[str, Any] = {}
|
|
64
68
|
|
|
65
69
|
self.delete_source = False
|
|
66
70
|
|
|
67
71
|
self.logger = logging.getLogger(__name__)
|
|
72
|
+
|
|
73
|
+
def _require_current_video(self) -> VideoFile:
|
|
74
|
+
"""Return the current VideoFile or raise if it has not been initialized."""
|
|
75
|
+
if self.current_video is None:
|
|
76
|
+
raise RuntimeError("Current video instance is not set")
|
|
77
|
+
return self.current_video
|
|
68
78
|
|
|
69
79
|
@contextmanager
|
|
70
80
|
def _file_lock(self, path: Path):
|
|
@@ -156,6 +166,9 @@ class VideoImportService():
|
|
|
156
166
|
return None
|
|
157
167
|
raise
|
|
158
168
|
|
|
169
|
+
# Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
|
|
170
|
+
self._create_sensitive_file()
|
|
171
|
+
|
|
159
172
|
# Create or retrieve video instance
|
|
160
173
|
self._create_or_retrieve_video_instance()
|
|
161
174
|
|
|
@@ -355,27 +368,25 @@ class VideoImportService():
|
|
|
355
368
|
|
|
356
369
|
def _setup_processing_environment(self):
|
|
357
370
|
"""Setup the processing environment without file movement."""
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
raise RuntimeError("No video instance available for processing environment setup")
|
|
361
|
-
|
|
371
|
+
video = self._require_current_video()
|
|
372
|
+
|
|
362
373
|
# Initialize video specifications
|
|
363
|
-
|
|
364
|
-
|
|
374
|
+
video.initialize_video_specs()
|
|
375
|
+
|
|
365
376
|
# Initialize frame objects in database
|
|
366
|
-
|
|
377
|
+
video.initialize_frames()
|
|
367
378
|
|
|
368
379
|
# Extract frames BEFORE processing to prevent pipeline 1 conflicts
|
|
369
380
|
self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
|
|
370
381
|
try:
|
|
371
|
-
frames_extracted =
|
|
382
|
+
frames_extracted = video.extract_frames(overwrite=False)
|
|
372
383
|
if frames_extracted:
|
|
373
384
|
self.processing_context['frames_extracted'] = True
|
|
374
385
|
self.logger.info("Frame extraction completed successfully")
|
|
375
386
|
|
|
376
387
|
# CRITICAL: Immediately save the frames_extracted state to database
|
|
377
388
|
# to prevent refresh_from_db() in pipeline 1 from overriding it
|
|
378
|
-
state =
|
|
389
|
+
state = video.get_or_create_state()
|
|
379
390
|
if not state.frames_extracted:
|
|
380
391
|
state.frames_extracted = True
|
|
381
392
|
state.save(update_fields=['frames_extracted'])
|
|
@@ -388,7 +399,7 @@ class VideoImportService():
|
|
|
388
399
|
self.processing_context['frames_extracted'] = False
|
|
389
400
|
|
|
390
401
|
# Ensure default patient data
|
|
391
|
-
self._ensure_default_patient_data()
|
|
402
|
+
self._ensure_default_patient_data(video_instance=video)
|
|
392
403
|
|
|
393
404
|
self.logger.info("Processing environment setup completed")
|
|
394
405
|
|
|
@@ -396,11 +407,12 @@ class VideoImportService():
|
|
|
396
407
|
"""Process frames and extract metadata with anonymization."""
|
|
397
408
|
# Check frame cleaning availability
|
|
398
409
|
frame_cleaning_available, FrameCleaner, ReportReader = self._ensure_frame_cleaning_available()
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
410
|
+
video = self._require_current_video()
|
|
411
|
+
|
|
412
|
+
raw_file_field = video.raw_file
|
|
413
|
+
has_raw_file = isinstance(raw_file_field, FieldFile) and bool(raw_file_field.name)
|
|
402
414
|
|
|
403
|
-
if not (frame_cleaning_available and
|
|
415
|
+
if not (frame_cleaning_available and has_raw_file):
|
|
404
416
|
self.logger.warning("Frame cleaning not available or conditions not met, using fallback anonymization.")
|
|
405
417
|
self._fallback_anonymize_video()
|
|
406
418
|
return
|
|
@@ -409,13 +421,13 @@ class VideoImportService():
|
|
|
409
421
|
self.logger.info("Starting frame-level anonymization with processor ROI masking...")
|
|
410
422
|
|
|
411
423
|
# Get processor ROI information
|
|
412
|
-
|
|
424
|
+
endoscope_data_roi_nested, endoscope_image_roi = self._get_processor_roi_info()
|
|
413
425
|
|
|
414
426
|
# Perform frame cleaning with timeout to prevent blocking
|
|
415
427
|
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
|
416
428
|
|
|
417
429
|
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
418
|
-
future = executor.submit(self._perform_frame_cleaning, FrameCleaner,
|
|
430
|
+
future = executor.submit(self._perform_frame_cleaning, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi)
|
|
419
431
|
try:
|
|
420
432
|
# Increased timeout to better accommodate ffmpeg + OCR
|
|
421
433
|
future.result(timeout=300)
|
|
@@ -427,14 +439,20 @@ class VideoImportService():
|
|
|
427
439
|
raw_video_path = self.processing_context.get('raw_video_path')
|
|
428
440
|
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
|
|
429
441
|
grace_seconds = 60
|
|
430
|
-
|
|
442
|
+
expected_cleaned_path: Optional[Path] = None
|
|
443
|
+
processed_field = video.processed_file
|
|
444
|
+
if isinstance(processed_field, FieldFile) and processed_field.name:
|
|
445
|
+
try:
|
|
446
|
+
expected_cleaned_path = Path(processed_field.path)
|
|
447
|
+
except (NotImplementedError, TypeError, ValueError):
|
|
448
|
+
expected_cleaned_path = None
|
|
431
449
|
found = False
|
|
432
|
-
if
|
|
450
|
+
if expected_cleaned_path is not None:
|
|
433
451
|
for _ in range(grace_seconds):
|
|
434
|
-
if
|
|
435
|
-
self.processing_context['cleaned_video_path'] =
|
|
452
|
+
if expected_cleaned_path.exists():
|
|
453
|
+
self.processing_context['cleaned_video_path'] = expected_cleaned_path
|
|
436
454
|
self.processing_context['anonymization_completed'] = True
|
|
437
|
-
self.logger.info("Detected cleaned video during grace period: %s",
|
|
455
|
+
self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
|
|
438
456
|
found = True
|
|
439
457
|
break
|
|
440
458
|
time.sleep(1)
|
|
@@ -442,7 +460,7 @@ class VideoImportService():
|
|
|
442
460
|
self._fallback_anonymize_video()
|
|
443
461
|
if not found:
|
|
444
462
|
raise TimeoutError("Frame cleaning operation timed out - likely Ollama connection issue")
|
|
445
|
-
|
|
463
|
+
|
|
446
464
|
except Exception as e:
|
|
447
465
|
self.logger.warning("Frame cleaning failed (reason: %s), falling back to simple copy", e)
|
|
448
466
|
# Try fallback anonymization when frame cleaning fails
|
|
@@ -455,17 +473,20 @@ class VideoImportService():
|
|
|
455
473
|
self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
|
|
456
474
|
|
|
457
475
|
def _save_anonymized_video(self):
|
|
458
|
-
|
|
459
|
-
|
|
476
|
+
video = self._require_current_video()
|
|
477
|
+
anonymized_video_path = video.get_target_anonymized_video_path()
|
|
478
|
+
|
|
460
479
|
if not anonymized_video_path.exists():
|
|
461
|
-
raise RuntimeError(f"Processed video file not found after assembly for {
|
|
480
|
+
raise RuntimeError(f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}")
|
|
462
481
|
|
|
463
482
|
new_processed_hash = get_video_hash(anonymized_video_path)
|
|
464
|
-
if
|
|
465
|
-
raise ValueError(
|
|
483
|
+
if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
|
|
484
|
+
raise ValueError(
|
|
485
|
+
f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
|
|
486
|
+
)
|
|
466
487
|
|
|
467
|
-
|
|
468
|
-
|
|
488
|
+
video.processed_video_hash = new_processed_hash
|
|
489
|
+
video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
|
|
469
490
|
|
|
470
491
|
update_fields = [
|
|
471
492
|
"processed_video_hash",
|
|
@@ -474,22 +495,23 @@ class VideoImportService():
|
|
|
474
495
|
]
|
|
475
496
|
|
|
476
497
|
if self.delete_source:
|
|
477
|
-
original_raw_file_path_to_delete =
|
|
478
|
-
original_raw_frame_dir_to_delete =
|
|
498
|
+
original_raw_file_path_to_delete = video.get_raw_file_path()
|
|
499
|
+
original_raw_frame_dir_to_delete = video.get_frame_dir_path()
|
|
479
500
|
|
|
480
|
-
|
|
501
|
+
video.raw_file.name = None # type: ignore[assignment]
|
|
481
502
|
|
|
482
503
|
update_fields.extend(["raw_file", "video_hash"])
|
|
483
504
|
|
|
484
505
|
transaction.on_commit(lambda: _cleanup_raw_assets(
|
|
485
|
-
video_uuid=
|
|
506
|
+
video_uuid=video.uuid,
|
|
486
507
|
raw_file_path=original_raw_file_path_to_delete,
|
|
487
508
|
raw_frame_dir=original_raw_frame_dir_to_delete
|
|
488
509
|
))
|
|
489
510
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
511
|
+
video.save(update_fields=update_fields)
|
|
512
|
+
video.state.mark_anonymized(save=True)
|
|
513
|
+
video.refresh_from_db()
|
|
514
|
+
self.current_video = video
|
|
493
515
|
return True
|
|
494
516
|
|
|
495
517
|
def _fallback_anonymize_video(self):
|
|
@@ -498,23 +520,23 @@ class VideoImportService():
|
|
|
498
520
|
"""
|
|
499
521
|
try:
|
|
500
522
|
self.logger.info("Attempting fallback video anonymization...")
|
|
501
|
-
|
|
523
|
+
video = self.current_video
|
|
524
|
+
if video is None:
|
|
525
|
+
self.logger.warning("No VideoFile instance available for fallback anonymization")
|
|
526
|
+
else:
|
|
502
527
|
# Try VideoFile.pipe_2() method if available
|
|
503
|
-
if hasattr(
|
|
528
|
+
if hasattr(video, 'pipe_2'):
|
|
504
529
|
self.logger.info("Trying VideoFile.pipe_2() method...")
|
|
505
|
-
if
|
|
530
|
+
if video.pipe_2():
|
|
506
531
|
self.logger.info("VideoFile.pipe_2() succeeded")
|
|
507
532
|
self.processing_context['anonymization_completed'] = True
|
|
508
533
|
return
|
|
509
|
-
|
|
510
|
-
self.logger.warning("VideoFile.pipe_2() returned False")
|
|
534
|
+
self.logger.warning("VideoFile.pipe_2() returned False")
|
|
511
535
|
# Try direct anonymization via _anonymize
|
|
512
|
-
if _anonymize(
|
|
536
|
+
if _anonymize(video, delete_original_raw=self.delete_source):
|
|
513
537
|
self.logger.info("VideoFile._anonymize() succeeded")
|
|
514
538
|
self.processing_context['anonymization_completed'] = True
|
|
515
539
|
return
|
|
516
|
-
else:
|
|
517
|
-
self.logger.warning("No VideoFile instance available for fallback anonymization")
|
|
518
540
|
|
|
519
541
|
# Strategy 2: Simple copy (no processing, just copy raw to processed)
|
|
520
542
|
self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
|
|
@@ -524,33 +546,19 @@ class VideoImportService():
|
|
|
524
546
|
except Exception as e:
|
|
525
547
|
self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
|
|
526
548
|
self.processing_context['anonymization_completed'] = False
|
|
527
|
-
self.processing_context['error_reason']
|
|
549
|
+
self.processing_context['error_reason'] = str(e)
|
|
528
550
|
def _finalize_processing(self):
|
|
529
551
|
"""Finalize processing and update video state."""
|
|
530
552
|
self.logger.info("Updating video processing state...")
|
|
531
553
|
|
|
532
554
|
with transaction.atomic():
|
|
533
|
-
|
|
534
|
-
|
|
555
|
+
video = self._require_current_video()
|
|
556
|
+
try:
|
|
557
|
+
video.refresh_from_db()
|
|
558
|
+
except Exception as refresh_error:
|
|
559
|
+
self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
|
|
535
560
|
|
|
536
|
-
|
|
537
|
-
try:
|
|
538
|
-
self.current_video.refresh_from_db()
|
|
539
|
-
except Exception as e:
|
|
540
|
-
self.logger.error(f"Failed to refresh current_video from DB: {e}")
|
|
541
|
-
if not self.current_video:
|
|
542
|
-
raise RuntimeError("No current video instance available for finalization")
|
|
543
|
-
|
|
544
|
-
if not self.current_video.processed_file:
|
|
545
|
-
self.logger.warning("No processed file available for current video")
|
|
546
|
-
self.current_video.processed_file = None # Ensure field is not None
|
|
547
|
-
self.current_video.mark_sensitive_meta_processed = False
|
|
548
|
-
else:
|
|
549
|
-
self.current_video.mark_sensitive_meta_processed = True
|
|
550
|
-
|
|
551
|
-
state = self.current_video.get_or_create_state()
|
|
552
|
-
if not state:
|
|
553
|
-
raise RuntimeError("Failed to get or create video state")
|
|
561
|
+
state = video.get_or_create_state()
|
|
554
562
|
|
|
555
563
|
# Only mark frames as extracted if they were successfully extracted
|
|
556
564
|
if self.processing_context.get('frames_extracted', False):
|
|
@@ -579,10 +587,7 @@ class VideoImportService():
|
|
|
579
587
|
|
|
580
588
|
# Save all state changes
|
|
581
589
|
state.save()
|
|
582
|
-
self.logger.info("Video processing state updated")
|
|
583
|
-
# Save all state changes
|
|
584
|
-
self.current_video.state.save()
|
|
585
|
-
self.current_video.save()
|
|
590
|
+
self.logger.info("Video processing state updated")
|
|
586
591
|
|
|
587
592
|
# Signal completion
|
|
588
593
|
self._signal_completion()
|
|
@@ -590,59 +595,48 @@ class VideoImportService():
|
|
|
590
595
|
def _cleanup_and_archive(self):
|
|
591
596
|
"""Move processed video to anonym_videos and cleanup."""
|
|
592
597
|
from endoreg_db.utils import data_paths
|
|
593
|
-
|
|
594
|
-
# Define target directory for processed videos
|
|
598
|
+
|
|
595
599
|
anonym_videos_dir = data_paths["anonym_video"] # /data/anonym_videos
|
|
596
600
|
anonym_videos_dir.mkdir(parents=True, exist_ok=True)
|
|
597
|
-
|
|
598
|
-
|
|
601
|
+
|
|
602
|
+
video = self._require_current_video()
|
|
603
|
+
|
|
599
604
|
processed_video_path = None
|
|
600
|
-
|
|
601
|
-
# Look for cleaned video from frame cleaning process
|
|
602
605
|
if 'cleaned_video_path' in self.processing_context:
|
|
603
606
|
processed_video_path = self.processing_context['cleaned_video_path']
|
|
604
607
|
else:
|
|
605
|
-
# If no processing occurred, copy from raw video location
|
|
606
608
|
raw_video_path = self.processing_context.get('raw_video_path')
|
|
607
609
|
if raw_video_path and Path(raw_video_path).exists():
|
|
608
610
|
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
|
|
609
611
|
processed_filename = f"processed_{video_filename}"
|
|
610
612
|
processed_video_path = Path(raw_video_path).parent / processed_filename
|
|
611
|
-
|
|
612
|
-
# Copy raw to processed location (will be moved to anonym_videos)
|
|
613
613
|
try:
|
|
614
614
|
shutil.copy2(str(raw_video_path), str(processed_video_path))
|
|
615
615
|
self.logger.info("Copied raw video for processing: %s", processed_video_path)
|
|
616
|
-
except Exception as
|
|
617
|
-
self.logger.error("Failed to copy raw video: %s",
|
|
618
|
-
processed_video_path = None
|
|
619
|
-
|
|
620
|
-
# Move processed video to anonym_videos ONLY if it exists
|
|
616
|
+
except Exception as exc:
|
|
617
|
+
self.logger.error("Failed to copy raw video: %s", exc)
|
|
618
|
+
processed_video_path = None
|
|
619
|
+
|
|
621
620
|
if processed_video_path and Path(processed_video_path).exists():
|
|
622
621
|
try:
|
|
623
|
-
# ✅ Clean filename: no original filename leakage
|
|
624
622
|
ext = Path(processed_video_path).suffix or ".mp4"
|
|
625
|
-
anonym_video_filename = f"anonym_{
|
|
623
|
+
anonym_video_filename = f"anonym_{video.uuid}{ext}"
|
|
626
624
|
anonym_target_path = anonym_videos_dir / anonym_video_filename
|
|
627
625
|
|
|
628
|
-
# Move processed video to anonym_videos/
|
|
629
626
|
shutil.move(str(processed_video_path), str(anonym_target_path))
|
|
630
627
|
self.logger.info("Moved processed video to: %s", anonym_target_path)
|
|
631
628
|
|
|
632
|
-
# Verify the file actually exists before updating database
|
|
633
629
|
if anonym_target_path.exists():
|
|
634
630
|
try:
|
|
635
631
|
storage_root = data_paths["storage"]
|
|
636
632
|
relative_path = anonym_target_path.relative_to(storage_root)
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
self.current_video.save(update_fields=["processed_file"])
|
|
633
|
+
video.processed_file.name = str(relative_path)
|
|
634
|
+
video.save(update_fields=["processed_file"])
|
|
640
635
|
self.logger.info("Updated processed_file path to: %s", relative_path)
|
|
641
|
-
except Exception as
|
|
642
|
-
self.logger.error("Failed to update processed_file path: %s",
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
self.current_video.save(update_fields=['processed_file'])
|
|
636
|
+
except Exception as exc:
|
|
637
|
+
self.logger.error("Failed to update processed_file path: %s", exc)
|
|
638
|
+
video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
|
|
639
|
+
video.save(update_fields=['processed_file'])
|
|
646
640
|
self.logger.info(
|
|
647
641
|
"Updated processed_file path using fallback: %s",
|
|
648
642
|
f"anonym_videos/{anonym_video_filename}",
|
|
@@ -651,277 +645,185 @@ class VideoImportService():
|
|
|
651
645
|
self.processing_context['anonymization_completed'] = True
|
|
652
646
|
else:
|
|
653
647
|
self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
|
|
654
|
-
except Exception as
|
|
655
|
-
self.logger.error("Failed to move processed video to anonym_videos: %s",
|
|
648
|
+
except Exception as exc:
|
|
649
|
+
self.logger.error("Failed to move processed video to anonym_videos: %s", exc)
|
|
656
650
|
else:
|
|
657
651
|
self.logger.warning("No processed video available - processed_file will remain empty")
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
# Cleanup temporary directories
|
|
652
|
+
|
|
661
653
|
try:
|
|
662
654
|
from endoreg_db.utils.paths import RAW_FRAME_DIR
|
|
663
655
|
shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
|
|
664
656
|
self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
|
|
665
|
-
except Exception as
|
|
666
|
-
self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR,
|
|
667
|
-
|
|
668
|
-
# Handle source file deletion - this should already be moved, but check raw_videos
|
|
657
|
+
except Exception as exc:
|
|
658
|
+
self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
|
|
659
|
+
|
|
669
660
|
source_path = self.processing_context['file_path']
|
|
670
661
|
if self.processing_context['delete_source'] and Path(source_path).exists():
|
|
671
662
|
try:
|
|
672
663
|
os.remove(source_path)
|
|
673
664
|
self.logger.info("Removed remaining source file: %s", source_path)
|
|
674
|
-
except Exception as
|
|
675
|
-
self.logger.warning("Failed to remove source file %s: %s", source_path,
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
if not self.current_video.processed_file or not Path(self.current_video.processed_file.path).exists():
|
|
665
|
+
except Exception as exc:
|
|
666
|
+
self.logger.warning("Failed to remove source file %s: %s", source_path, exc)
|
|
667
|
+
|
|
668
|
+
if not video.processed_file or not Path(video.processed_file.path).exists():
|
|
680
669
|
self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
670
|
+
try:
|
|
671
|
+
video.anonymize(delete_original_raw=self.delete_source)
|
|
672
|
+
video.save(update_fields=['processed_file'])
|
|
673
|
+
self.logger.info("Late-stage anonymization succeeded")
|
|
674
|
+
except Exception as e:
|
|
675
|
+
self.logger.error("Late-stage anonymization failed: %s", e)
|
|
676
|
+
self.processing_context['anonymization_completed'] = False
|
|
677
|
+
|
|
685
678
|
self.logger.info("Cleanup and archiving completed")
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
# Mark as processed (in-memory tracking)
|
|
679
|
+
|
|
690
680
|
self.processed_files.add(str(self.processing_context['file_path']))
|
|
691
|
-
|
|
692
|
-
# Refresh from database and finalize state
|
|
681
|
+
|
|
693
682
|
with transaction.atomic():
|
|
694
|
-
|
|
695
|
-
if hasattr(
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
self.logger.info("Import and anonymization completed for VideoFile UUID: %s", self.current_video.uuid)
|
|
683
|
+
video.refresh_from_db()
|
|
684
|
+
if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
|
|
685
|
+
video.state.mark_sensitive_meta_processed(save=True)
|
|
686
|
+
|
|
687
|
+
self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
|
|
700
688
|
self.logger.info("Raw video stored in: /data/videos")
|
|
701
689
|
self.logger.info("Processed video stored in: /data/anonym_videos")
|
|
702
690
|
|
|
703
|
-
def _create_sensitive_file(
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
691
|
+
def _create_sensitive_file(
|
|
692
|
+
self,
|
|
693
|
+
video_instance: VideoFile | None = None,
|
|
694
|
+
file_path: Path | str | None = None,
|
|
695
|
+
) -> Path:
|
|
696
|
+
"""Create or move a sensitive copy of the raw video file inside storage."""
|
|
707
697
|
|
|
708
|
-
|
|
709
|
-
video_instance: Optional video instance, defaults to self.current_video
|
|
710
|
-
file_path: Optional file path, defaults to processing_context['file_path']
|
|
698
|
+
video = video_instance or self._require_current_video()
|
|
711
699
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
"""
|
|
715
|
-
video_file = video_instance or self.current_video
|
|
716
|
-
# Always use the currently stored raw file path from the model to avoid deleting external source assets
|
|
717
|
-
source_path = None
|
|
700
|
+
raw_field: FieldFile | None = getattr(video, "raw_file", None)
|
|
701
|
+
source_path: Path | None = None
|
|
718
702
|
try:
|
|
719
|
-
if
|
|
720
|
-
source_path = Path(
|
|
703
|
+
if raw_field and raw_field.path:
|
|
704
|
+
source_path = Path(raw_field.path)
|
|
721
705
|
except Exception:
|
|
722
706
|
source_path = None
|
|
723
|
-
|
|
707
|
+
|
|
724
708
|
if source_path is None and file_path is not None:
|
|
725
709
|
source_path = Path(file_path)
|
|
726
|
-
|
|
727
|
-
if
|
|
728
|
-
raise ValueError("No video instance available for creating sensitive file")
|
|
729
|
-
if not source_path:
|
|
710
|
+
|
|
711
|
+
if source_path is None:
|
|
730
712
|
raise ValueError("No file path available for creating sensitive file")
|
|
731
|
-
|
|
732
|
-
if not video_file.raw_file:
|
|
713
|
+
if not raw_field:
|
|
733
714
|
raise ValueError("VideoFile must have a raw_file to create a sensitive file")
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
target_dir = VIDEO_DIR / 'sensitive'
|
|
715
|
+
|
|
716
|
+
target_dir = VIDEO_DIR / "sensitive"
|
|
737
717
|
if not target_dir.exists():
|
|
738
|
-
self.logger.info(
|
|
718
|
+
self.logger.info("Creating sensitive file directory: %s", target_dir)
|
|
739
719
|
os.makedirs(target_dir, exist_ok=True)
|
|
740
|
-
|
|
741
|
-
# Move the stored raw file into the sensitive directory within storage
|
|
720
|
+
|
|
742
721
|
target_file_path = target_dir / source_path.name
|
|
743
722
|
try:
|
|
744
|
-
# Prefer a move within the storage to avoid extra disk usage. This does not touch external input files.
|
|
745
723
|
shutil.move(str(source_path), str(target_file_path))
|
|
746
|
-
self.logger.info(
|
|
747
|
-
except Exception as
|
|
748
|
-
|
|
749
|
-
self.logger.warning(f"Failed to move raw file to sensitive dir, copying instead: {e}")
|
|
724
|
+
self.logger.info("Moved raw file to sensitive directory: %s", target_file_path)
|
|
725
|
+
except Exception as exc:
|
|
726
|
+
self.logger.warning("Failed to move raw file to sensitive dir, copying instead: %s", exc)
|
|
750
727
|
shutil.copy(str(source_path), str(target_file_path))
|
|
751
728
|
try:
|
|
752
|
-
# Remove only the stored raw file copy; never touch external input paths here
|
|
753
729
|
os.remove(source_path)
|
|
754
730
|
except FileNotFoundError:
|
|
755
731
|
pass
|
|
756
|
-
|
|
757
|
-
# Update the model to point to the sensitive file location
|
|
758
|
-
# Use relative path from storage root, like in create_from_file.py
|
|
732
|
+
|
|
759
733
|
try:
|
|
760
734
|
from endoreg_db.utils import data_paths
|
|
735
|
+
|
|
761
736
|
storage_root = data_paths["storage"]
|
|
762
737
|
relative_path = target_file_path.relative_to(storage_root)
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
self.logger.info(
|
|
766
|
-
except Exception as
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
# Source file cleanup for external inputs is handled by create_from_file via delete_source flag.
|
|
775
|
-
|
|
776
|
-
self.logger.info(f"Created sensitive file for {video_file.uuid} at {target_file_path}")
|
|
777
|
-
return target_file_path
|
|
738
|
+
video.raw_file.name = str(relative_path)
|
|
739
|
+
video.save(update_fields=["raw_file"])
|
|
740
|
+
self.logger.info("Updated video.raw_file to point to sensitive location: %s", relative_path)
|
|
741
|
+
except Exception as exc:
|
|
742
|
+
self.logger.warning("Failed to set relative path, using fallback: %s", exc)
|
|
743
|
+
video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
|
|
744
|
+
video.save(update_fields=["raw_file"])
|
|
745
|
+
self.logger.info(
|
|
746
|
+
"Updated video.raw_file using fallback method: videos/sensitive/%s",
|
|
747
|
+
target_file_path.name,
|
|
748
|
+
)
|
|
778
749
|
|
|
750
|
+
self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
|
|
751
|
+
return target_file_path
|
|
779
752
|
|
|
753
|
+
def _get_processor_roi_info(self) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
|
|
754
|
+
"""Get processor ROI information for masking."""
|
|
755
|
+
endoscope_data_roi_nested = None
|
|
756
|
+
endoscope_image_roi = None
|
|
780
757
|
|
|
758
|
+
video = self._require_current_video()
|
|
781
759
|
|
|
782
|
-
def _ensure_frame_cleaning_available(self):
|
|
783
|
-
"""
|
|
784
|
-
Ensure frame cleaning modules are available by adding lx-anonymizer to path.
|
|
785
|
-
|
|
786
|
-
Returns:
|
|
787
|
-
Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
|
|
788
|
-
"""
|
|
789
760
|
try:
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
sys.path.insert(0, str(lx_anonymizer_path))
|
|
798
|
-
|
|
799
|
-
# Try simple import
|
|
800
|
-
from lx_anonymizer import FrameCleaner, ReportReader
|
|
801
|
-
|
|
802
|
-
self.logger.info("Successfully imported lx_anonymizer modules")
|
|
803
|
-
|
|
804
|
-
# Remove from path to avoid conflicts
|
|
805
|
-
if str(lx_anonymizer_path) in sys.path:
|
|
806
|
-
sys.path.remove(str(lx_anonymizer_path))
|
|
807
|
-
|
|
808
|
-
return True, FrameCleaner, ReportReader
|
|
809
|
-
|
|
761
|
+
video_meta = getattr(video, "video_meta", None)
|
|
762
|
+
processor = getattr(video_meta, "processor", None) if video_meta else None
|
|
763
|
+
if processor:
|
|
764
|
+
assert isinstance(processor, EndoscopyProcessor), "Processor is not of type EndoscopyProcessor"
|
|
765
|
+
endoscope_image_roi = processor.get_roi_endoscope_image()
|
|
766
|
+
endoscope_data_roi_nested = processor.get_rois()
|
|
767
|
+
self.logger.info("Retrieved processor ROI information: endoscope_image_roi=%s", endoscope_image_roi)
|
|
810
768
|
else:
|
|
811
|
-
self.logger.warning(
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
769
|
+
self.logger.warning(
|
|
770
|
+
"No processor found for video %s, proceeding without ROI masking",
|
|
771
|
+
video.uuid,
|
|
772
|
+
)
|
|
773
|
+
except Exception as exc:
|
|
774
|
+
self.logger.error("Failed to retrieve processor ROI information: %s", exc)
|
|
817
775
|
|
|
818
|
-
|
|
819
|
-
"""Get processor ROI information for masking."""
|
|
820
|
-
processor_roi = None
|
|
821
|
-
endoscope_roi = None
|
|
822
|
-
|
|
823
|
-
try:
|
|
824
|
-
if self.current_video.video_meta and self.current_video.video_meta.processor:
|
|
825
|
-
processor = getattr(self.current_video.video_meta, "processor", None)
|
|
826
|
-
|
|
827
|
-
# Get the endoscope ROI for masking
|
|
828
|
-
endoscope_roi = processor.get_roi_endoscope_image()
|
|
829
|
-
|
|
830
|
-
# Get all processor ROIs for comprehensive masking
|
|
831
|
-
processor_roi = {
|
|
832
|
-
'endoscope_image': endoscope_roi,
|
|
833
|
-
'patient_first_name': processor.get_roi_patient_first_name(),
|
|
834
|
-
'patient_last_name': processor.get_roi_patient_last_name(),
|
|
835
|
-
'patient_dob': processor.get_roi_patient_dob(),
|
|
836
|
-
'examination_date': processor.get_roi_examination_date(),
|
|
837
|
-
'examination_time': processor.get_roi_examination_time(),
|
|
838
|
-
'endoscope_type': processor.get_roi_endoscope_type(),
|
|
839
|
-
'endoscopy_sn': processor.get_roi_endoscopy_sn(),
|
|
840
|
-
}
|
|
841
|
-
|
|
842
|
-
self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
|
|
843
|
-
else:
|
|
844
|
-
self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
|
|
845
|
-
|
|
846
|
-
except Exception as e:
|
|
847
|
-
self.logger.error(f"Failed to retrieve processor ROI information: {e}")
|
|
848
|
-
# Continue without ROI - don't fail the entire import process
|
|
849
|
-
|
|
850
|
-
return processor_roi, endoscope_roi
|
|
776
|
+
return endoscope_data_roi_nested, endoscope_image_roi
|
|
851
777
|
|
|
778
|
+
def _ensure_default_patient_data(self, video_instance: VideoFile | None = None) -> None:
|
|
779
|
+
"""Ensure minimum patient data is present on the video's SensitiveMeta."""
|
|
852
780
|
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
Args:
|
|
860
|
-
video_instance: Optional video instance, defaults to self.current_video
|
|
861
|
-
"""
|
|
862
|
-
video_file = video_instance or self.current_video
|
|
863
|
-
|
|
864
|
-
if not video_file:
|
|
865
|
-
raise ValueError("No video instance available for ensuring patient data")
|
|
866
|
-
|
|
867
|
-
if not video_file.sensitive_meta:
|
|
868
|
-
self.logger.info(f"No SensitiveMeta found for video {video_file.uuid}, creating default")
|
|
869
|
-
|
|
870
|
-
# Create default SensitiveMeta with placeholder data
|
|
781
|
+
video = video_instance or self._require_current_video()
|
|
782
|
+
|
|
783
|
+
sensitive_meta = getattr(video, "sensitive_meta", None)
|
|
784
|
+
if not sensitive_meta:
|
|
785
|
+
self.logger.info("No SensitiveMeta found for video %s, creating default", video.uuid)
|
|
871
786
|
default_data = {
|
|
872
787
|
"patient_first_name": "Patient",
|
|
873
|
-
"patient_last_name": "Unknown",
|
|
874
|
-
"patient_dob": date(1990, 1, 1),
|
|
788
|
+
"patient_last_name": "Unknown",
|
|
789
|
+
"patient_dob": date(1990, 1, 1),
|
|
875
790
|
"examination_date": date.today(),
|
|
876
|
-
"center_name":
|
|
791
|
+
"center_name": video.center.name if video.center else "university_hospital_wuerzburg",
|
|
877
792
|
}
|
|
878
|
-
|
|
879
793
|
try:
|
|
880
794
|
sensitive_meta = SensitiveMeta.create_from_dict(default_data)
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
# Mark sensitive meta as processed after creating default data
|
|
885
|
-
state = video_file.get_or_create_state()
|
|
795
|
+
video.sensitive_meta = sensitive_meta
|
|
796
|
+
video.save(update_fields=["sensitive_meta"])
|
|
797
|
+
state = video.get_or_create_state()
|
|
886
798
|
state.mark_sensitive_meta_processed(save=True)
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
self.logger.error(f"Failed to create default SensitiveMeta for video {video_file.uuid}: {e}")
|
|
799
|
+
self.logger.info("Created default SensitiveMeta for video %s", video.uuid)
|
|
800
|
+
except Exception as exc:
|
|
801
|
+
self.logger.error("Failed to create default SensitiveMeta for video %s: %s", video.uuid, exc)
|
|
891
802
|
return
|
|
892
|
-
|
|
893
803
|
else:
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
update_data = {}
|
|
897
|
-
|
|
898
|
-
if not video_file.sensitive_meta.patient_first_name:
|
|
804
|
+
update_data: Dict[str, Any] = {}
|
|
805
|
+
if not sensitive_meta.patient_first_name:
|
|
899
806
|
update_data["patient_first_name"] = "Patient"
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
if not video_file.sensitive_meta.patient_last_name:
|
|
807
|
+
if not sensitive_meta.patient_last_name:
|
|
903
808
|
update_data["patient_last_name"] = "Unknown"
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
if not video_file.sensitive_meta.patient_dob:
|
|
809
|
+
if not sensitive_meta.patient_dob:
|
|
907
810
|
update_data["patient_dob"] = date(1990, 1, 1)
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
if not video_file.sensitive_meta.examination_date:
|
|
811
|
+
if not sensitive_meta.examination_date:
|
|
911
812
|
update_data["examination_date"] = date.today()
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
if update_needed:
|
|
813
|
+
|
|
814
|
+
if update_data:
|
|
915
815
|
try:
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
# Mark sensitive meta as processed after updating missing fields
|
|
919
|
-
state = video_file.get_or_create_state()
|
|
816
|
+
sensitive_meta.update_from_dict(update_data)
|
|
817
|
+
state = video.get_or_create_state()
|
|
920
818
|
state.mark_sensitive_meta_processed(save=True)
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
819
|
+
self.logger.info(
|
|
820
|
+
"Updated missing SensitiveMeta fields for video %s: %s",
|
|
821
|
+
video.uuid,
|
|
822
|
+
list(update_data.keys()),
|
|
823
|
+
)
|
|
824
|
+
except Exception as exc:
|
|
825
|
+
self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
|
|
826
|
+
|
|
925
827
|
|
|
926
828
|
|
|
927
829
|
def _ensure_frame_cleaning_available(self):
|
|
@@ -935,6 +837,9 @@ class VideoImportService():
|
|
|
935
837
|
# Check if we can find the lx-anonymizer directory
|
|
936
838
|
from importlib import resources
|
|
937
839
|
lx_anonymizer_path = resources.files("lx_anonymizer")
|
|
840
|
+
|
|
841
|
+
# make sure lx_anonymizer_path is a Path object
|
|
842
|
+
lx_anonymizer_path = Path(str(lx_anonymizer_path))
|
|
938
843
|
|
|
939
844
|
if lx_anonymizer_path.exists():
|
|
940
845
|
# Add to Python path temporarily
|
|
@@ -960,41 +865,9 @@ class VideoImportService():
|
|
|
960
865
|
|
|
961
866
|
return False, None, None
|
|
962
867
|
|
|
963
|
-
|
|
964
|
-
"""Get processor ROI information for masking."""
|
|
965
|
-
processor_roi = None
|
|
966
|
-
endoscope_roi = None
|
|
967
|
-
|
|
968
|
-
try:
|
|
969
|
-
if self.current_video.video_meta and self.current_video.video_meta.processor:
|
|
970
|
-
processor = getattr(self.current_video.video_meta, "processor", None)
|
|
971
|
-
|
|
972
|
-
# Get the endoscope ROI for masking
|
|
973
|
-
endoscope_roi = processor.get_roi_endoscope_image()
|
|
974
|
-
|
|
975
|
-
# Get all processor ROIs for comprehensive masking
|
|
976
|
-
processor_roi = {
|
|
977
|
-
'endoscope_image': endoscope_roi,
|
|
978
|
-
'patient_first_name': processor.get_roi_patient_first_name(),
|
|
979
|
-
'patient_last_name': processor.get_roi_patient_last_name(),
|
|
980
|
-
'patient_dob': processor.get_roi_patient_dob(),
|
|
981
|
-
'examination_date': processor.get_roi_examination_date(),
|
|
982
|
-
'examination_time': processor.get_roi_examination_time(),
|
|
983
|
-
'endoscope_type': processor.get_roi_endoscope_type(),
|
|
984
|
-
'endoscopy_sn': processor.get_roi_endoscopy_sn(),
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
|
|
988
|
-
else:
|
|
989
|
-
self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
|
|
990
|
-
|
|
991
|
-
except Exception as e:
|
|
992
|
-
self.logger.error(f"Failed to retrieve processor ROI information: {e}")
|
|
993
|
-
# Continue without ROI - don't fail the entire import process
|
|
994
|
-
|
|
995
|
-
return processor_roi, endoscope_roi
|
|
868
|
+
|
|
996
869
|
|
|
997
|
-
def _perform_frame_cleaning(self, FrameCleaner,
|
|
870
|
+
def _perform_frame_cleaning(self, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi):
|
|
998
871
|
"""Perform frame cleaning and anonymization."""
|
|
999
872
|
# Instantiate frame cleaner
|
|
1000
873
|
frame_cleaner = FrameCleaner()
|
|
@@ -1006,32 +879,35 @@ class VideoImportService():
|
|
|
1006
879
|
raise RuntimeError(f"Raw video path not found: {raw_video_path}")
|
|
1007
880
|
|
|
1008
881
|
# Get processor name safely
|
|
1009
|
-
|
|
882
|
+
video = self._require_current_video()
|
|
883
|
+
video_meta = getattr(video, "video_meta", None)
|
|
884
|
+
processor = getattr(video_meta, "processor", None) if video_meta else None
|
|
1010
885
|
device_name = processor.name if processor else self.processing_context['processor_name']
|
|
1011
|
-
|
|
1012
|
-
tmp_dir = RAW_FRAME_DIR
|
|
1013
|
-
|
|
886
|
+
|
|
1014
887
|
# Create temporary output path for cleaned video
|
|
1015
888
|
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
|
|
1016
889
|
cleaned_filename = f"cleaned_{video_filename}"
|
|
1017
890
|
cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
|
|
1018
891
|
|
|
892
|
+
processor_roi, endoscope_roi = self._get_processor_roi_info(video)
|
|
893
|
+
|
|
894
|
+
# Processor roi can be used later to OCR preknown regions.
|
|
895
|
+
|
|
1019
896
|
# Clean video with ROI masking (heavy I/O operation)
|
|
1020
897
|
actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
|
|
1021
|
-
Path(raw_video_path),
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
cleaned_video_path
|
|
898
|
+
video_path=Path(raw_video_path),
|
|
899
|
+
video_file_obj=video,
|
|
900
|
+
endoscope_image_roi=endoscope_image_roi,
|
|
901
|
+
endoscope_data_roi_nested=endoscope_data_roi_nested,
|
|
902
|
+
output_path=cleaned_video_path,
|
|
903
|
+
technique="mask_overlay"
|
|
1028
904
|
)
|
|
1029
905
|
|
|
1030
906
|
# Optional: enrich metadata using TrOCR+LLM on one random extracted frame
|
|
1031
907
|
try:
|
|
1032
908
|
# Prefer frames belonging to this video (UUID in path), else pick any frame
|
|
1033
909
|
frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
|
|
1034
|
-
video_uuid = str(
|
|
910
|
+
video_uuid = str(video.uuid)
|
|
1035
911
|
filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
|
|
1036
912
|
if filtered:
|
|
1037
913
|
sample_frame = random.choice(filtered)
|
|
@@ -1062,88 +938,74 @@ class VideoImportService():
|
|
|
1062
938
|
self.logger.info(f"Frame cleaning with ROI masking completed: {actual_cleaned_path}")
|
|
1063
939
|
self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
|
|
1064
940
|
|
|
1065
|
-
def _update_sensitive_metadata(self, extracted_metadata):
|
|
941
|
+
def _update_sensitive_metadata(self, extracted_metadata: Dict[str, Any]):
|
|
1066
942
|
"""
|
|
1067
943
|
Update sensitive metadata with extracted information.
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
This prevents accidentally overwriting valuable manually entered or previously extracted data.
|
|
944
|
+
Args:
|
|
945
|
+
extracted_metadata (Dict[str, Any]): Extracted metadata to update.
|
|
1071
946
|
"""
|
|
1072
|
-
|
|
947
|
+
video = self._require_current_video()
|
|
948
|
+
sensitive_meta = getattr(video, "sensitive_meta", None)
|
|
949
|
+
|
|
950
|
+
if not (sensitive_meta and extracted_metadata):
|
|
1073
951
|
return
|
|
1074
|
-
|
|
1075
|
-
sm =
|
|
952
|
+
|
|
953
|
+
sm = sensitive_meta
|
|
1076
954
|
updated_fields = []
|
|
1077
955
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
'endoscope_type': 'endoscope_type'
|
|
1085
|
-
}
|
|
1086
|
-
|
|
1087
|
-
# Define default/placeholder values that are safe to overwrite
|
|
1088
|
-
SAFE_TO_OVERWRITE_VALUES = [
|
|
1089
|
-
'Vorname unbekannt', # Default first name
|
|
1090
|
-
'Nachname unbekannt', # Default last name
|
|
1091
|
-
date(1990, 1, 1), # Default DOB
|
|
1092
|
-
None, # Empty values
|
|
1093
|
-
'', # Empty strings
|
|
1094
|
-
'N/A', # Placeholder values
|
|
1095
|
-
'Unbekanntes Gerät', # Default device name
|
|
1096
|
-
]
|
|
1097
|
-
|
|
1098
|
-
for meta_key, sm_field in metadata_mapping.items():
|
|
1099
|
-
if extracted_metadata.get(meta_key) and hasattr(sm, sm_field):
|
|
1100
|
-
old_value = getattr(sm, sm_field)
|
|
1101
|
-
new_value = extracted_metadata[meta_key]
|
|
1102
|
-
|
|
1103
|
-
# Enhanced safety check: Only update if current value is safe to overwrite
|
|
1104
|
-
if new_value and (old_value in SAFE_TO_OVERWRITE_VALUES):
|
|
1105
|
-
self.logger.info(f"Updating {sm_field} from '{old_value}' to '{new_value}' for video {self.current_video.uuid}")
|
|
1106
|
-
setattr(sm, sm_field, new_value)
|
|
1107
|
-
updated_fields.append(sm_field)
|
|
1108
|
-
elif new_value and old_value and old_value not in SAFE_TO_OVERWRITE_VALUES:
|
|
1109
|
-
self.logger.info(f"Preserving existing {sm_field} value '{old_value}' (not overwriting with '{new_value}') for video {self.current_video.uuid}")
|
|
1110
|
-
|
|
956
|
+
try:
|
|
957
|
+
sm.update_from_dict(extracted_metadata)
|
|
958
|
+
updated_fields = list(extracted_metadata.keys())
|
|
959
|
+
except KeyError as e:
|
|
960
|
+
self.logger.warning(f"Failed to update SensitiveMeta field {e}")
|
|
961
|
+
|
|
1111
962
|
if updated_fields:
|
|
1112
963
|
sm.save(update_fields=updated_fields)
|
|
1113
|
-
self.logger.info(
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
self.logger.info(
|
|
964
|
+
self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
|
|
965
|
+
|
|
966
|
+
state = video.get_or_create_state()
|
|
967
|
+
state.mark_sensitive_meta_processed(save=True)
|
|
968
|
+
self.logger.info("Marked sensitive metadata as processed for video %s", video.uuid)
|
|
1118
969
|
else:
|
|
1119
|
-
self.logger.info(
|
|
970
|
+
self.logger.info("No SensitiveMeta fields updated for video %s - all existing values preserved", video.uuid)
|
|
1120
971
|
|
|
1121
972
|
def _signal_completion(self):
|
|
1122
973
|
"""Signal completion to the tracking system."""
|
|
1123
974
|
try:
|
|
975
|
+
video = self._require_current_video()
|
|
976
|
+
|
|
977
|
+
raw_field: FieldFile | None = getattr(video, "raw_file", None)
|
|
978
|
+
raw_exists = False
|
|
979
|
+
if raw_field and getattr(raw_field, "path", None):
|
|
980
|
+
try:
|
|
981
|
+
raw_exists = Path(raw_field.path).exists()
|
|
982
|
+
except (ValueError, OSError):
|
|
983
|
+
raw_exists = False
|
|
984
|
+
|
|
1124
985
|
video_processing_complete = (
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
hasattr(self.current_video.raw_file, 'path') and
|
|
1129
|
-
Path(self.current_video.raw_file.path).exists()
|
|
986
|
+
video.sensitive_meta is not None and
|
|
987
|
+
video.video_meta is not None and
|
|
988
|
+
raw_exists
|
|
1130
989
|
)
|
|
1131
|
-
|
|
990
|
+
|
|
1132
991
|
if video_processing_complete:
|
|
1133
|
-
self.logger.info(
|
|
1134
|
-
|
|
992
|
+
self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
|
|
993
|
+
|
|
1135
994
|
# Update completion flags if they exist
|
|
1136
995
|
completion_fields = []
|
|
1137
996
|
for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
|
|
1138
|
-
if hasattr(
|
|
1139
|
-
setattr(
|
|
997
|
+
if hasattr(video, field_name):
|
|
998
|
+
setattr(video, field_name, True)
|
|
1140
999
|
completion_fields.append(field_name)
|
|
1141
1000
|
|
|
1142
1001
|
if completion_fields:
|
|
1143
|
-
|
|
1144
|
-
self.logger.info(
|
|
1002
|
+
video.save(update_fields=completion_fields)
|
|
1003
|
+
self.logger.info("Updated completion flags: %s", completion_fields)
|
|
1145
1004
|
else:
|
|
1146
|
-
self.logger.warning(
|
|
1005
|
+
self.logger.warning(
|
|
1006
|
+
"Video %s processing incomplete - missing required components",
|
|
1007
|
+
video.uuid,
|
|
1008
|
+
)
|
|
1147
1009
|
|
|
1148
1010
|
except Exception as e:
|
|
1149
1011
|
self.logger.warning(f"Failed to signal completion status: {e}")
|
|
@@ -1186,6 +1048,9 @@ class VideoImportService():
|
|
|
1186
1048
|
self.processed_files.remove(file_path_str)
|
|
1187
1049
|
self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
|
|
1188
1050
|
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
|
|
1189
1054
|
except Exception as e:
|
|
1190
1055
|
self.logger.warning(f"Error during context cleanup: {e}")
|
|
1191
1056
|
finally:
|
|
@@ -1200,7 +1065,7 @@ def import_and_anonymize(
|
|
|
1200
1065
|
processor_name: str,
|
|
1201
1066
|
save_video: bool = True,
|
|
1202
1067
|
delete_source: bool = False,
|
|
1203
|
-
) ->
|
|
1068
|
+
) -> VideoFile | None:
|
|
1204
1069
|
"""Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
|
|
1205
1070
|
Kept for backward compatibility with callers that import this function directly.
|
|
1206
1071
|
"""
|