endoreg-db 0.8.2__py3-none-any.whl → 0.8.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/config/__init__.py +0 -0
- endoreg_db/helpers/default_objects.py +48 -29
- endoreg_db/management/commands/import_video.py +5 -3
- endoreg_db/migrations/0003_add_center_display_name.py +30 -0
- endoreg_db/models/administration/center/center.py +7 -1
- endoreg_db/models/media/pdf/raw_pdf.py +31 -26
- endoreg_db/models/media/video/create_from_file.py +26 -4
- endoreg_db/models/media/video/video_file.py +89 -57
- endoreg_db/models/media/video/video_file_anonymize.py +2 -1
- endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +12 -0
- endoreg_db/models/media/video/video_file_io.py +4 -2
- endoreg_db/models/metadata/sensitive_meta.py +6 -6
- endoreg_db/models/metadata/video_meta.py +2 -2
- endoreg_db/services/pdf_import.py +131 -15
- endoreg_db/services/pseudonym_service.py +1 -1
- endoreg_db/services/video_import.py +400 -387
- endoreg_db/urls/sensitive_meta.py +0 -0
- endoreg_db/utils/paths.py +2 -10
- endoreg_db/utils/video/ffmpeg_wrapper.py +67 -4
- endoreg_db/views/anonymization/validate.py +75 -34
- endoreg_db/views/video/correction.py +8 -6
- {endoreg_db-0.8.2.dist-info → endoreg_db-0.8.2.2.dist-info}/METADATA +2 -2
- {endoreg_db-0.8.2.dist-info → endoreg_db-0.8.2.2.dist-info}/RECORD +25 -23
- endoreg_db/services/ollama_api_docs.py +0 -1528
- {endoreg_db-0.8.2.dist-info → endoreg_db-0.8.2.2.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.2.dist-info → endoreg_db-0.8.2.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -18,14 +18,21 @@ from contextlib import contextmanager
|
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
from typing import Union, Dict, Any, Optional
|
|
20
20
|
from django.db import transaction
|
|
21
|
+
from transformers.models.align.convert_align_tf_to_hf import get_processor
|
|
21
22
|
from endoreg_db.models import VideoFile, SensitiveMeta
|
|
22
23
|
from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
|
|
23
24
|
import random
|
|
24
25
|
from lx_anonymizer.ocr import trocr_full_image_ocr
|
|
25
|
-
from
|
|
26
|
+
from endoreg_db.utils.hashs import get_video_hash
|
|
27
|
+
from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
|
|
28
|
+
from typing import TYPE_CHECKING
|
|
29
|
+
from django.db.models.fields.files import FieldFile
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from endoreg_db.models import EndoscopyProcessor
|
|
26
33
|
|
|
27
34
|
# File lock configuration (matches PDF import)
|
|
28
|
-
STALE_LOCK_SECONDS =
|
|
35
|
+
STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
|
|
29
36
|
MAX_LOCK_WAIT_SECONDS = 90 # New: wait up to 90s for a non-stale lock to clear before skipping
|
|
30
37
|
|
|
31
38
|
logger = logging.getLogger(__name__)
|
|
@@ -43,7 +50,7 @@ class VideoImportService():
|
|
|
43
50
|
- Graceful fallback processing without lx_anonymizer
|
|
44
51
|
"""
|
|
45
52
|
|
|
46
|
-
def __init__(self, project_root: Path = None):
|
|
53
|
+
def __init__(self, project_root: Optional[Path] = None):
|
|
47
54
|
|
|
48
55
|
# Set up project root path
|
|
49
56
|
if project_root:
|
|
@@ -57,10 +64,18 @@ class VideoImportService():
|
|
|
57
64
|
self.STORAGE_DIR = STORAGE_DIR
|
|
58
65
|
|
|
59
66
|
# Central video instance and processing context
|
|
60
|
-
self.current_video = None
|
|
67
|
+
self.current_video: Optional[VideoFile] = None
|
|
61
68
|
self.processing_context: Dict[str, Any] = {}
|
|
62
69
|
|
|
70
|
+
self.delete_source = False
|
|
71
|
+
|
|
63
72
|
self.logger = logging.getLogger(__name__)
|
|
73
|
+
|
|
74
|
+
def _require_current_video(self) -> VideoFile:
|
|
75
|
+
"""Return the current VideoFile or raise if it has not been initialized."""
|
|
76
|
+
if self.current_video is None:
|
|
77
|
+
raise RuntimeError("Current video instance is not set")
|
|
78
|
+
return self.current_video
|
|
64
79
|
|
|
65
80
|
@contextmanager
|
|
66
81
|
def _file_lock(self, path: Path):
|
|
@@ -132,7 +147,7 @@ class VideoImportService():
|
|
|
132
147
|
processor_name: str,
|
|
133
148
|
save_video: bool = True,
|
|
134
149
|
delete_source: bool = True,
|
|
135
|
-
) -> "VideoFile":
|
|
150
|
+
) -> "VideoFile|None":
|
|
136
151
|
"""
|
|
137
152
|
High-level helper that orchestrates the complete video import and anonymization process.
|
|
138
153
|
Uses the central video instance pattern for improved state management.
|
|
@@ -152,6 +167,9 @@ class VideoImportService():
|
|
|
152
167
|
return None
|
|
153
168
|
raise
|
|
154
169
|
|
|
170
|
+
# Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
|
|
171
|
+
self._create_sensitive_file()
|
|
172
|
+
|
|
155
173
|
# Create or retrieve video instance
|
|
156
174
|
self._create_or_retrieve_video_instance()
|
|
157
175
|
|
|
@@ -264,66 +282,112 @@ class VideoImportService():
|
|
|
264
282
|
from endoreg_db.utils import data_paths
|
|
265
283
|
|
|
266
284
|
source_path = self.processing_context['file_path']
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
videos_dir = data_paths["video"] # /data/videos for raw files
|
|
285
|
+
|
|
286
|
+
videos_dir = data_paths["video"]
|
|
270
287
|
videos_dir.mkdir(parents=True, exist_ok=True)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
288
|
+
|
|
289
|
+
_current_video = self.current_video
|
|
290
|
+
assert _current_video is not None, "Current video instance is None during storage move"
|
|
291
|
+
|
|
292
|
+
stored_raw_path = None
|
|
293
|
+
if hasattr(_current_video, "get_raw_file_path"):
|
|
294
|
+
possible_path = _current_video.get_raw_file_path()
|
|
295
|
+
if possible_path:
|
|
296
|
+
try:
|
|
297
|
+
stored_raw_path = Path(possible_path)
|
|
298
|
+
except (TypeError, ValueError):
|
|
299
|
+
stored_raw_path = None
|
|
300
|
+
|
|
301
|
+
if stored_raw_path:
|
|
302
|
+
try:
|
|
303
|
+
storage_root = data_paths["storage"]
|
|
304
|
+
if stored_raw_path.is_absolute():
|
|
305
|
+
if not stored_raw_path.is_relative_to(storage_root):
|
|
306
|
+
stored_raw_path = None
|
|
307
|
+
else:
|
|
308
|
+
if stored_raw_path.parts and stored_raw_path.parts[0] == videos_dir.name:
|
|
309
|
+
stored_raw_path = storage_root / stored_raw_path
|
|
310
|
+
else:
|
|
311
|
+
stored_raw_path = videos_dir / stored_raw_path.name
|
|
312
|
+
except Exception:
|
|
313
|
+
stored_raw_path = None
|
|
314
|
+
|
|
315
|
+
if stored_raw_path and not stored_raw_path.suffix:
|
|
316
|
+
stored_raw_path = None
|
|
317
|
+
|
|
318
|
+
if not stored_raw_path:
|
|
319
|
+
uuid_str = getattr(_current_video, "uuid", None)
|
|
320
|
+
source_suffix = Path(source_path).suffix or ".mp4"
|
|
321
|
+
filename = f"{uuid_str}{source_suffix}" if uuid_str else Path(source_path).name
|
|
322
|
+
stored_raw_path = videos_dir / filename
|
|
323
|
+
|
|
324
|
+
delete_source = bool(self.processing_context.get('delete_source'))
|
|
325
|
+
stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
|
|
326
|
+
|
|
327
|
+
if not stored_raw_path.exists():
|
|
328
|
+
try:
|
|
329
|
+
if source_path.exists():
|
|
330
|
+
if delete_source:
|
|
331
|
+
shutil.move(str(source_path), str(stored_raw_path))
|
|
332
|
+
self.logger.info("Moved raw video to: %s", stored_raw_path)
|
|
333
|
+
else:
|
|
334
|
+
shutil.copy2(str(source_path), str(stored_raw_path))
|
|
335
|
+
self.logger.info("Copied raw video to: %s", stored_raw_path)
|
|
336
|
+
else:
|
|
337
|
+
raise FileNotFoundError(f"Neither stored raw path nor source path exists for {self.processing_context['file_path']}")
|
|
338
|
+
except Exception as e:
|
|
339
|
+
self.logger.error("Failed to place video in final storage: %s", e)
|
|
340
|
+
raise
|
|
341
|
+
else:
|
|
342
|
+
# If we already have the stored copy, respect delete_source flag without touching assets unnecessarily
|
|
343
|
+
if delete_source and source_path.exists():
|
|
344
|
+
try:
|
|
345
|
+
os.remove(source_path)
|
|
346
|
+
self.logger.info("Removed original source file after storing copy: %s", source_path)
|
|
347
|
+
except OSError as e:
|
|
348
|
+
self.logger.warning("Failed to remove source file %s: %s", source_path, e)
|
|
349
|
+
|
|
350
|
+
# Ensure database path points to stored location (relative to storage root)
|
|
286
351
|
try:
|
|
287
352
|
storage_root = data_paths["storage"]
|
|
288
|
-
relative_path =
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
353
|
+
relative_path = Path(stored_raw_path).relative_to(storage_root)
|
|
354
|
+
if _current_video.raw_file.name != str(relative_path):
|
|
355
|
+
_current_video.raw_file.name = str(relative_path)
|
|
356
|
+
_current_video.save(update_fields=['raw_file'])
|
|
357
|
+
self.logger.info("Updated raw_file path to: %s", relative_path)
|
|
292
358
|
except Exception as e:
|
|
293
|
-
self.logger.error("Failed to
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
359
|
+
self.logger.error("Failed to ensure raw_file path is relative: %s", e)
|
|
360
|
+
fallback_relative = Path("videos") / Path(stored_raw_path).name
|
|
361
|
+
if _current_video.raw_file.name != fallback_relative.as_posix():
|
|
362
|
+
_current_video.raw_file.name = fallback_relative.as_posix()
|
|
363
|
+
_current_video.save(update_fields=['raw_file'])
|
|
364
|
+
self.logger.info("Updated raw_file path using fallback: %s", fallback_relative.as_posix())
|
|
365
|
+
|
|
300
366
|
# Store paths for later processing
|
|
301
|
-
self.processing_context['raw_video_path'] =
|
|
302
|
-
self.processing_context['video_filename'] =
|
|
367
|
+
self.processing_context['raw_video_path'] = Path(stored_raw_path)
|
|
368
|
+
self.processing_context['video_filename'] = Path(stored_raw_path).name
|
|
303
369
|
|
|
304
370
|
def _setup_processing_environment(self):
|
|
305
371
|
"""Setup the processing environment without file movement."""
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
raise RuntimeError("No video instance available for processing environment setup")
|
|
309
|
-
|
|
372
|
+
video = self._require_current_video()
|
|
373
|
+
|
|
310
374
|
# Initialize video specifications
|
|
311
|
-
|
|
312
|
-
|
|
375
|
+
video.initialize_video_specs()
|
|
376
|
+
|
|
313
377
|
# Initialize frame objects in database
|
|
314
|
-
|
|
378
|
+
video.initialize_frames()
|
|
315
379
|
|
|
316
380
|
# Extract frames BEFORE processing to prevent pipeline 1 conflicts
|
|
317
381
|
self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
|
|
318
382
|
try:
|
|
319
|
-
frames_extracted =
|
|
383
|
+
frames_extracted = video.extract_frames(overwrite=False)
|
|
320
384
|
if frames_extracted:
|
|
321
385
|
self.processing_context['frames_extracted'] = True
|
|
322
386
|
self.logger.info("Frame extraction completed successfully")
|
|
323
387
|
|
|
324
388
|
# CRITICAL: Immediately save the frames_extracted state to database
|
|
325
389
|
# to prevent refresh_from_db() in pipeline 1 from overriding it
|
|
326
|
-
state =
|
|
390
|
+
state = video.get_or_create_state()
|
|
327
391
|
if not state.frames_extracted:
|
|
328
392
|
state.frames_extracted = True
|
|
329
393
|
state.save(update_fields=['frames_extracted'])
|
|
@@ -336,7 +400,7 @@ class VideoImportService():
|
|
|
336
400
|
self.processing_context['frames_extracted'] = False
|
|
337
401
|
|
|
338
402
|
# Ensure default patient data
|
|
339
|
-
self._ensure_default_patient_data()
|
|
403
|
+
self._ensure_default_patient_data(video_instance=video)
|
|
340
404
|
|
|
341
405
|
self.logger.info("Processing environment setup completed")
|
|
342
406
|
|
|
@@ -344,8 +408,12 @@ class VideoImportService():
|
|
|
344
408
|
"""Process frames and extract metadata with anonymization."""
|
|
345
409
|
# Check frame cleaning availability
|
|
346
410
|
frame_cleaning_available, FrameCleaner, ReportReader = self._ensure_frame_cleaning_available()
|
|
347
|
-
|
|
348
|
-
|
|
411
|
+
video = self._require_current_video()
|
|
412
|
+
|
|
413
|
+
raw_file_field = video.raw_file
|
|
414
|
+
has_raw_file = isinstance(raw_file_field, FieldFile) and bool(raw_file_field.name)
|
|
415
|
+
|
|
416
|
+
if not (frame_cleaning_available and has_raw_file):
|
|
349
417
|
self.logger.warning("Frame cleaning not available or conditions not met, using fallback anonymization.")
|
|
350
418
|
self._fallback_anonymize_video()
|
|
351
419
|
return
|
|
@@ -372,14 +440,20 @@ class VideoImportService():
|
|
|
372
440
|
raw_video_path = self.processing_context.get('raw_video_path')
|
|
373
441
|
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
|
|
374
442
|
grace_seconds = 60
|
|
375
|
-
|
|
443
|
+
expected_cleaned_path: Optional[Path] = None
|
|
444
|
+
processed_field = video.processed_file
|
|
445
|
+
if isinstance(processed_field, FieldFile) and processed_field.name:
|
|
446
|
+
try:
|
|
447
|
+
expected_cleaned_path = Path(processed_field.path)
|
|
448
|
+
except (NotImplementedError, TypeError, ValueError):
|
|
449
|
+
expected_cleaned_path = None
|
|
376
450
|
found = False
|
|
377
|
-
if
|
|
451
|
+
if expected_cleaned_path is not None:
|
|
378
452
|
for _ in range(grace_seconds):
|
|
379
|
-
if
|
|
380
|
-
self.processing_context['cleaned_video_path'] =
|
|
453
|
+
if expected_cleaned_path.exists():
|
|
454
|
+
self.processing_context['cleaned_video_path'] = expected_cleaned_path
|
|
381
455
|
self.processing_context['anonymization_completed'] = True
|
|
382
|
-
self.logger.info("Detected cleaned video during grace period: %s",
|
|
456
|
+
self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
|
|
383
457
|
found = True
|
|
384
458
|
break
|
|
385
459
|
time.sleep(1)
|
|
@@ -387,7 +461,7 @@ class VideoImportService():
|
|
|
387
461
|
self._fallback_anonymize_video()
|
|
388
462
|
if not found:
|
|
389
463
|
raise TimeoutError("Frame cleaning operation timed out - likely Ollama connection issue")
|
|
390
|
-
|
|
464
|
+
|
|
391
465
|
except Exception as e:
|
|
392
466
|
self.logger.warning("Frame cleaning failed (reason: %s), falling back to simple copy", e)
|
|
393
467
|
# Try fallback anonymization when frame cleaning fails
|
|
@@ -398,76 +472,94 @@ class VideoImportService():
|
|
|
398
472
|
# If even fallback fails, mark as not anonymized but continue import
|
|
399
473
|
self.processing_context['anonymization_completed'] = False
|
|
400
474
|
self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
|
|
401
|
-
|
|
475
|
+
|
|
476
|
+
def _save_anonymized_video(self):
|
|
477
|
+
video = self._require_current_video()
|
|
478
|
+
anonymized_video_path = video.get_target_anonymized_video_path()
|
|
479
|
+
|
|
480
|
+
if not anonymized_video_path.exists():
|
|
481
|
+
raise RuntimeError(f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}")
|
|
482
|
+
|
|
483
|
+
new_processed_hash = get_video_hash(anonymized_video_path)
|
|
484
|
+
if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
|
|
485
|
+
raise ValueError(
|
|
486
|
+
f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
video.processed_video_hash = new_processed_hash
|
|
490
|
+
video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
|
|
491
|
+
|
|
492
|
+
update_fields = [
|
|
493
|
+
"processed_video_hash",
|
|
494
|
+
"processed_file",
|
|
495
|
+
"frame_dir",
|
|
496
|
+
]
|
|
497
|
+
|
|
498
|
+
if self.delete_source:
|
|
499
|
+
original_raw_file_path_to_delete = video.get_raw_file_path()
|
|
500
|
+
original_raw_frame_dir_to_delete = video.get_frame_dir_path()
|
|
501
|
+
|
|
502
|
+
video.raw_file.name = None # type: ignore[assignment]
|
|
503
|
+
|
|
504
|
+
update_fields.extend(["raw_file", "video_hash"])
|
|
505
|
+
|
|
506
|
+
transaction.on_commit(lambda: _cleanup_raw_assets(
|
|
507
|
+
video_uuid=video.uuid,
|
|
508
|
+
raw_file_path=original_raw_file_path_to_delete,
|
|
509
|
+
raw_frame_dir=original_raw_frame_dir_to_delete
|
|
510
|
+
))
|
|
511
|
+
|
|
512
|
+
video.save(update_fields=update_fields)
|
|
513
|
+
video.state.mark_anonymized(save=True)
|
|
514
|
+
video.refresh_from_db()
|
|
515
|
+
self.current_video = video
|
|
516
|
+
return True
|
|
402
517
|
|
|
403
518
|
def _fallback_anonymize_video(self):
|
|
404
519
|
"""
|
|
405
520
|
Fallback to create anonymized video if lx_anonymizer is not available.
|
|
406
|
-
|
|
407
|
-
This method tries multiple fallback strategies:
|
|
408
|
-
1. Use VideoFile.anonymize_video() method if available
|
|
409
|
-
2. Simple copy of raw video to anonym_videos (no processing)
|
|
410
|
-
|
|
411
|
-
The processed video will be marked in processing_context for _cleanup_and_archive().
|
|
412
521
|
"""
|
|
413
522
|
try:
|
|
414
523
|
self.logger.info("Attempting fallback video anonymization...")
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
524
|
+
video = self.current_video
|
|
525
|
+
if video is None:
|
|
526
|
+
self.logger.warning("No VideoFile instance available for fallback anonymization")
|
|
527
|
+
else:
|
|
528
|
+
# Try VideoFile.pipe_2() method if available
|
|
529
|
+
if hasattr(video, 'pipe_2'):
|
|
530
|
+
self.logger.info("Trying VideoFile.pipe_2() method...")
|
|
531
|
+
if video.pipe_2():
|
|
532
|
+
self.logger.info("VideoFile.pipe_2() succeeded")
|
|
533
|
+
self.processing_context['anonymization_completed'] = True
|
|
534
|
+
return
|
|
535
|
+
self.logger.warning("VideoFile.pipe_2() returned False")
|
|
536
|
+
# Try direct anonymization via _anonymize
|
|
537
|
+
if _anonymize(video, delete_original_raw=self.delete_source):
|
|
538
|
+
self.logger.info("VideoFile._anonymize() succeeded")
|
|
423
539
|
self.processing_context['anonymization_completed'] = True
|
|
424
540
|
return
|
|
425
|
-
else:
|
|
426
|
-
self.logger.warning("VideoFile.pipe_2() returned False, trying simple copy fallback")
|
|
427
|
-
else:
|
|
428
|
-
self.logger.warning("VideoFile.pipe_2() method not available")
|
|
429
541
|
|
|
430
542
|
# Strategy 2: Simple copy (no processing, just copy raw to processed)
|
|
431
543
|
self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
|
|
432
|
-
|
|
433
|
-
# The _cleanup_and_archive() method will handle the copy
|
|
434
|
-
# We just need to mark that no real anonymization happened
|
|
435
544
|
self.processing_context['anonymization_completed'] = False
|
|
436
|
-
self.processing_context['use_raw_as_processed'] = True
|
|
437
|
-
|
|
545
|
+
self.processing_context['use_raw_as_processed'] = True
|
|
438
546
|
self.logger.warning("Fallback: Video will be imported without anonymization (raw copy used)")
|
|
439
|
-
|
|
440
547
|
except Exception as e:
|
|
441
548
|
self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
|
|
442
549
|
self.processing_context['anonymization_completed'] = False
|
|
443
|
-
self.processing_context['error_reason'] =
|
|
444
|
-
|
|
550
|
+
self.processing_context['error_reason'] = str(e)
|
|
445
551
|
def _finalize_processing(self):
|
|
446
552
|
"""Finalize processing and update video state."""
|
|
447
553
|
self.logger.info("Updating video processing state...")
|
|
448
554
|
|
|
449
555
|
with transaction.atomic():
|
|
450
|
-
|
|
451
|
-
|
|
556
|
+
video = self._require_current_video()
|
|
557
|
+
try:
|
|
558
|
+
video.refresh_from_db()
|
|
559
|
+
except Exception as refresh_error:
|
|
560
|
+
self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
|
|
452
561
|
|
|
453
|
-
|
|
454
|
-
try:
|
|
455
|
-
self.current_video.refresh_from_db()
|
|
456
|
-
except Exception as e:
|
|
457
|
-
self.logger.error(f"Failed to refresh current_video from DB: {e}")
|
|
458
|
-
if not self.current_video:
|
|
459
|
-
raise RuntimeError("No current video instance available for finalization")
|
|
460
|
-
|
|
461
|
-
if not self.current_video.processed_file:
|
|
462
|
-
self.logger.warning("No processed file available for current video")
|
|
463
|
-
self.current_video.processed_file = None # Ensure field is not None
|
|
464
|
-
self.current_video.mark_sensitive_meta_processed = False
|
|
465
|
-
else:
|
|
466
|
-
self.current_video.mark_sensitive_meta_processed = True
|
|
467
|
-
|
|
468
|
-
state = self.current_video.get_or_create_state()
|
|
469
|
-
if not state:
|
|
470
|
-
raise RuntimeError("Failed to get or create video state")
|
|
562
|
+
state = video.get_or_create_state()
|
|
471
563
|
|
|
472
564
|
# Only mark frames as extracted if they were successfully extracted
|
|
473
565
|
if self.processing_context.get('frames_extracted', False):
|
|
@@ -496,10 +588,7 @@ class VideoImportService():
|
|
|
496
588
|
|
|
497
589
|
# Save all state changes
|
|
498
590
|
state.save()
|
|
499
|
-
self.logger.info("Video processing state updated")
|
|
500
|
-
# Save all state changes
|
|
501
|
-
self.current_video.state.save()
|
|
502
|
-
self.current_video.save()
|
|
591
|
+
self.logger.info("Video processing state updated")
|
|
503
592
|
|
|
504
593
|
# Signal completion
|
|
505
594
|
self._signal_completion()
|
|
@@ -507,59 +596,48 @@ class VideoImportService():
|
|
|
507
596
|
def _cleanup_and_archive(self):
|
|
508
597
|
"""Move processed video to anonym_videos and cleanup."""
|
|
509
598
|
from endoreg_db.utils import data_paths
|
|
510
|
-
|
|
511
|
-
# Define target directory for processed videos
|
|
599
|
+
|
|
512
600
|
anonym_videos_dir = data_paths["anonym_video"] # /data/anonym_videos
|
|
513
601
|
anonym_videos_dir.mkdir(parents=True, exist_ok=True)
|
|
514
|
-
|
|
515
|
-
|
|
602
|
+
|
|
603
|
+
video = self._require_current_video()
|
|
604
|
+
|
|
516
605
|
processed_video_path = None
|
|
517
|
-
|
|
518
|
-
# Look for cleaned video from frame cleaning process
|
|
519
606
|
if 'cleaned_video_path' in self.processing_context:
|
|
520
607
|
processed_video_path = self.processing_context['cleaned_video_path']
|
|
521
608
|
else:
|
|
522
|
-
# If no processing occurred, copy from raw video location
|
|
523
609
|
raw_video_path = self.processing_context.get('raw_video_path')
|
|
524
610
|
if raw_video_path and Path(raw_video_path).exists():
|
|
525
611
|
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
|
|
526
612
|
processed_filename = f"processed_{video_filename}"
|
|
527
613
|
processed_video_path = Path(raw_video_path).parent / processed_filename
|
|
528
|
-
|
|
529
|
-
# Copy raw to processed location (will be moved to anonym_videos)
|
|
530
614
|
try:
|
|
531
615
|
shutil.copy2(str(raw_video_path), str(processed_video_path))
|
|
532
616
|
self.logger.info("Copied raw video for processing: %s", processed_video_path)
|
|
533
|
-
except Exception as
|
|
534
|
-
self.logger.error("Failed to copy raw video: %s",
|
|
535
|
-
processed_video_path = None
|
|
536
|
-
|
|
537
|
-
# Move processed video to anonym_videos ONLY if it exists
|
|
617
|
+
except Exception as exc:
|
|
618
|
+
self.logger.error("Failed to copy raw video: %s", exc)
|
|
619
|
+
processed_video_path = None
|
|
620
|
+
|
|
538
621
|
if processed_video_path and Path(processed_video_path).exists():
|
|
539
622
|
try:
|
|
540
|
-
# ✅ Clean filename: no original filename leakage
|
|
541
623
|
ext = Path(processed_video_path).suffix or ".mp4"
|
|
542
|
-
anonym_video_filename = f"anonym_{
|
|
624
|
+
anonym_video_filename = f"anonym_{video.uuid}{ext}"
|
|
543
625
|
anonym_target_path = anonym_videos_dir / anonym_video_filename
|
|
544
626
|
|
|
545
|
-
# Move processed video to anonym_videos/
|
|
546
627
|
shutil.move(str(processed_video_path), str(anonym_target_path))
|
|
547
628
|
self.logger.info("Moved processed video to: %s", anonym_target_path)
|
|
548
629
|
|
|
549
|
-
# Verify the file actually exists before updating database
|
|
550
630
|
if anonym_target_path.exists():
|
|
551
631
|
try:
|
|
552
632
|
storage_root = data_paths["storage"]
|
|
553
633
|
relative_path = anonym_target_path.relative_to(storage_root)
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
self.current_video.save(update_fields=["processed_file"])
|
|
634
|
+
video.processed_file.name = str(relative_path)
|
|
635
|
+
video.save(update_fields=["processed_file"])
|
|
557
636
|
self.logger.info("Updated processed_file path to: %s", relative_path)
|
|
558
|
-
except Exception as
|
|
559
|
-
self.logger.error("Failed to update processed_file path: %s",
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
self.current_video.save(update_fields=['processed_file'])
|
|
637
|
+
except Exception as exc:
|
|
638
|
+
self.logger.error("Failed to update processed_file path: %s", exc)
|
|
639
|
+
video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
|
|
640
|
+
video.save(update_fields=['processed_file'])
|
|
563
641
|
self.logger.info(
|
|
564
642
|
"Updated processed_file path using fallback: %s",
|
|
565
643
|
f"anonym_videos/{anonym_video_filename}",
|
|
@@ -568,264 +646,194 @@ class VideoImportService():
|
|
|
568
646
|
self.processing_context['anonymization_completed'] = True
|
|
569
647
|
else:
|
|
570
648
|
self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
|
|
571
|
-
except Exception as
|
|
572
|
-
self.logger.error("Failed to move processed video to anonym_videos: %s",
|
|
649
|
+
except Exception as exc:
|
|
650
|
+
self.logger.error("Failed to move processed video to anonym_videos: %s", exc)
|
|
573
651
|
else:
|
|
574
652
|
self.logger.warning("No processed video available - processed_file will remain empty")
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
# Cleanup temporary directories
|
|
653
|
+
|
|
578
654
|
try:
|
|
579
655
|
from endoreg_db.utils.paths import RAW_FRAME_DIR
|
|
580
656
|
shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
|
|
581
657
|
self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
|
|
582
|
-
except Exception as
|
|
583
|
-
self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR,
|
|
584
|
-
|
|
585
|
-
# Handle source file deletion - this should already be moved, but check raw_videos
|
|
658
|
+
except Exception as exc:
|
|
659
|
+
self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
|
|
660
|
+
|
|
586
661
|
source_path = self.processing_context['file_path']
|
|
587
662
|
if self.processing_context['delete_source'] and Path(source_path).exists():
|
|
588
663
|
try:
|
|
589
664
|
os.remove(source_path)
|
|
590
665
|
self.logger.info("Removed remaining source file: %s", source_path)
|
|
666
|
+
except Exception as exc:
|
|
667
|
+
self.logger.warning("Failed to remove source file %s: %s", source_path, exc)
|
|
668
|
+
|
|
669
|
+
if not video.processed_file or not Path(video.processed_file.path).exists():
|
|
670
|
+
self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
|
|
671
|
+
try:
|
|
672
|
+
video.anonymize(delete_original_raw=self.delete_source)
|
|
673
|
+
video.save(update_fields=['processed_file'])
|
|
674
|
+
self.logger.info("Late-stage anonymization succeeded")
|
|
591
675
|
except Exception as e:
|
|
592
|
-
self.logger.
|
|
593
|
-
|
|
594
|
-
|
|
676
|
+
self.logger.error("Late-stage anonymization failed: %s", e)
|
|
677
|
+
self.processing_context['anonymization_completed'] = False
|
|
678
|
+
|
|
679
|
+
self.logger.info("Cleanup and archiving completed")
|
|
680
|
+
|
|
595
681
|
self.processed_files.add(str(self.processing_context['file_path']))
|
|
596
|
-
|
|
597
|
-
# Refresh from database and finalize state
|
|
682
|
+
|
|
598
683
|
with transaction.atomic():
|
|
599
|
-
|
|
600
|
-
if hasattr(
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
self.logger.info("Import and anonymization completed for VideoFile UUID: %s",
|
|
684
|
+
video.refresh_from_db()
|
|
685
|
+
if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
|
|
686
|
+
video.state.mark_sensitive_meta_processed(save=True)
|
|
687
|
+
|
|
688
|
+
self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
|
|
604
689
|
self.logger.info("Raw video stored in: /data/videos")
|
|
605
690
|
self.logger.info("Processed video stored in: /data/anonym_videos")
|
|
606
691
|
|
|
607
|
-
def _create_sensitive_file(
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
692
|
+
def _create_sensitive_file(
|
|
693
|
+
self,
|
|
694
|
+
video_instance: VideoFile | None = None,
|
|
695
|
+
file_path: Path | str | None = None,
|
|
696
|
+
) -> Path:
|
|
697
|
+
"""Create or move a sensitive copy of the raw video file inside storage."""
|
|
611
698
|
|
|
612
|
-
|
|
613
|
-
video_instance: Optional video instance, defaults to self.current_video
|
|
614
|
-
file_path: Optional file path, defaults to processing_context['file_path']
|
|
699
|
+
video = video_instance or self._require_current_video()
|
|
615
700
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
"""
|
|
619
|
-
video_file = video_instance or self.current_video
|
|
620
|
-
# Always use the currently stored raw file path from the model to avoid deleting external source assets
|
|
621
|
-
source_path = None
|
|
701
|
+
raw_field: FieldFile | None = getattr(video, "raw_file", None)
|
|
702
|
+
source_path: Path | None = None
|
|
622
703
|
try:
|
|
623
|
-
if
|
|
624
|
-
source_path = Path(
|
|
704
|
+
if raw_field and raw_field.path:
|
|
705
|
+
source_path = Path(raw_field.path)
|
|
625
706
|
except Exception:
|
|
626
707
|
source_path = None
|
|
627
|
-
|
|
708
|
+
|
|
628
709
|
if source_path is None and file_path is not None:
|
|
629
710
|
source_path = Path(file_path)
|
|
630
|
-
|
|
631
|
-
if
|
|
632
|
-
raise ValueError("No video instance available for creating sensitive file")
|
|
633
|
-
if not source_path:
|
|
711
|
+
|
|
712
|
+
if source_path is None:
|
|
634
713
|
raise ValueError("No file path available for creating sensitive file")
|
|
635
|
-
|
|
636
|
-
if not video_file.raw_file:
|
|
714
|
+
if not raw_field:
|
|
637
715
|
raise ValueError("VideoFile must have a raw_file to create a sensitive file")
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
target_dir = VIDEO_DIR / 'sensitive'
|
|
716
|
+
|
|
717
|
+
target_dir = VIDEO_DIR / "sensitive"
|
|
641
718
|
if not target_dir.exists():
|
|
642
|
-
self.logger.info(
|
|
719
|
+
self.logger.info("Creating sensitive file directory: %s", target_dir)
|
|
643
720
|
os.makedirs(target_dir, exist_ok=True)
|
|
644
|
-
|
|
645
|
-
# Move the stored raw file into the sensitive directory within storage
|
|
721
|
+
|
|
646
722
|
target_file_path = target_dir / source_path.name
|
|
647
723
|
try:
|
|
648
|
-
# Prefer a move within the storage to avoid extra disk usage. This does not touch external input files.
|
|
649
724
|
shutil.move(str(source_path), str(target_file_path))
|
|
650
|
-
self.logger.info(
|
|
651
|
-
except Exception as
|
|
652
|
-
|
|
653
|
-
self.logger.warning(f"Failed to move raw file to sensitive dir, copying instead: {e}")
|
|
725
|
+
self.logger.info("Moved raw file to sensitive directory: %s", target_file_path)
|
|
726
|
+
except Exception as exc:
|
|
727
|
+
self.logger.warning("Failed to move raw file to sensitive dir, copying instead: %s", exc)
|
|
654
728
|
shutil.copy(str(source_path), str(target_file_path))
|
|
655
729
|
try:
|
|
656
|
-
# Remove only the stored raw file copy; never touch external input paths here
|
|
657
730
|
os.remove(source_path)
|
|
658
731
|
except FileNotFoundError:
|
|
659
732
|
pass
|
|
660
|
-
|
|
661
|
-
# Update the model to point to the sensitive file location
|
|
662
|
-
# Use relative path from storage root, like in create_from_file.py
|
|
733
|
+
|
|
663
734
|
try:
|
|
664
735
|
from endoreg_db.utils import data_paths
|
|
736
|
+
|
|
665
737
|
storage_root = data_paths["storage"]
|
|
666
738
|
relative_path = target_file_path.relative_to(storage_root)
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
self.logger.info(
|
|
670
|
-
except Exception as
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
# Source file cleanup for external inputs is handled by create_from_file via delete_source flag.
|
|
679
|
-
|
|
680
|
-
self.logger.info(f"Created sensitive file for {video_file.uuid} at {target_file_path}")
|
|
681
|
-
return target_file_path
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
739
|
+
video.raw_file.name = str(relative_path)
|
|
740
|
+
video.save(update_fields=["raw_file"])
|
|
741
|
+
self.logger.info("Updated video.raw_file to point to sensitive location: %s", relative_path)
|
|
742
|
+
except Exception as exc:
|
|
743
|
+
self.logger.warning("Failed to set relative path, using fallback: %s", exc)
|
|
744
|
+
video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
|
|
745
|
+
video.save(update_fields=["raw_file"])
|
|
746
|
+
self.logger.info(
|
|
747
|
+
"Updated video.raw_file using fallback method: videos/sensitive/%s",
|
|
748
|
+
target_file_path.name,
|
|
749
|
+
)
|
|
685
750
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
Ensure frame cleaning modules are available by adding lx-anonymizer to path.
|
|
689
|
-
|
|
690
|
-
Returns:
|
|
691
|
-
Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
|
|
692
|
-
"""
|
|
693
|
-
try:
|
|
694
|
-
# Check if we can find the lx-anonymizer directory
|
|
695
|
-
from importlib import resources
|
|
696
|
-
lx_anonymizer_path = resources.files("lx_anonymizer")
|
|
697
|
-
|
|
698
|
-
if lx_anonymizer_path.exists():
|
|
699
|
-
# Add to Python path temporarily
|
|
700
|
-
if str(lx_anonymizer_path) not in sys.path:
|
|
701
|
-
sys.path.insert(0, str(lx_anonymizer_path))
|
|
702
|
-
|
|
703
|
-
# Try simple import
|
|
704
|
-
from lx_anonymizer import FrameCleaner, ReportReader
|
|
705
|
-
|
|
706
|
-
self.logger.info("Successfully imported lx_anonymizer modules")
|
|
707
|
-
|
|
708
|
-
# Remove from path to avoid conflicts
|
|
709
|
-
if str(lx_anonymizer_path) in sys.path:
|
|
710
|
-
sys.path.remove(str(lx_anonymizer_path))
|
|
711
|
-
|
|
712
|
-
return True, FrameCleaner, ReportReader
|
|
713
|
-
|
|
714
|
-
else:
|
|
715
|
-
self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
|
|
716
|
-
|
|
717
|
-
except Exception as e:
|
|
718
|
-
self.logger.warning(f"Frame cleaning not available: {e}")
|
|
719
|
-
|
|
720
|
-
return False, None, None
|
|
751
|
+
self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
|
|
752
|
+
return target_file_path
|
|
721
753
|
|
|
722
754
|
def _get_processor_roi_info(self):
|
|
723
755
|
"""Get processor ROI information for masking."""
|
|
724
756
|
processor_roi = None
|
|
725
757
|
endoscope_roi = None
|
|
726
|
-
|
|
758
|
+
|
|
759
|
+
video = self._require_current_video()
|
|
760
|
+
|
|
727
761
|
try:
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
762
|
+
video_meta = getattr(video, "video_meta", None)
|
|
763
|
+
processor = getattr(video_meta, "processor", None) if video_meta else None
|
|
764
|
+
if processor:
|
|
765
|
+
assert isinstance(processor, EndoscopyProcessor), "Processor is not of type EndoscopyProcessor"
|
|
732
766
|
endoscope_roi = processor.get_roi_endoscope_image()
|
|
733
|
-
|
|
734
|
-
# Get all processor ROIs for comprehensive masking
|
|
735
767
|
processor_roi = {
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
768
|
+
"endoscope_image": endoscope_roi,
|
|
769
|
+
"patient_first_name": processor.get_roi_patient_first_name(),
|
|
770
|
+
"patient_last_name": processor.get_roi_patient_last_name(),
|
|
771
|
+
"patient_dob": processor.get_roi_patient_dob(),
|
|
772
|
+
"examination_date": processor.get_roi_examination_date(),
|
|
773
|
+
"examination_time": processor.get_roi_examination_time(),
|
|
774
|
+
"endoscope_type": processor.get_roi_endoscope_type(),
|
|
775
|
+
"endoscopy_sn": processor.get_roi_endoscopy_sn(),
|
|
744
776
|
}
|
|
745
|
-
|
|
746
|
-
self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
|
|
777
|
+
self.logger.info("Retrieved processor ROI information: endoscope_roi=%s", endoscope_roi)
|
|
747
778
|
else:
|
|
748
|
-
self.logger.warning(
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
779
|
+
self.logger.warning(
|
|
780
|
+
"No processor found for video %s, proceeding without ROI masking",
|
|
781
|
+
video.uuid,
|
|
782
|
+
)
|
|
783
|
+
except Exception as exc:
|
|
784
|
+
self.logger.error("Failed to retrieve processor ROI information: %s", exc)
|
|
785
|
+
|
|
754
786
|
return processor_roi, endoscope_roi
|
|
755
787
|
|
|
788
|
+
def _ensure_default_patient_data(self, video_instance: VideoFile | None = None) -> None:
|
|
789
|
+
"""Ensure minimum patient data is present on the video's SensitiveMeta."""
|
|
756
790
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
Args:
|
|
764
|
-
video_instance: Optional video instance, defaults to self.current_video
|
|
765
|
-
"""
|
|
766
|
-
video_file = video_instance or self.current_video
|
|
767
|
-
|
|
768
|
-
if not video_file:
|
|
769
|
-
raise ValueError("No video instance available for ensuring patient data")
|
|
770
|
-
|
|
771
|
-
if not video_file.sensitive_meta:
|
|
772
|
-
self.logger.info(f"No SensitiveMeta found for video {video_file.uuid}, creating default")
|
|
773
|
-
|
|
774
|
-
# Create default SensitiveMeta with placeholder data
|
|
791
|
+
video = video_instance or self._require_current_video()
|
|
792
|
+
|
|
793
|
+
sensitive_meta = getattr(video, "sensitive_meta", None)
|
|
794
|
+
if not sensitive_meta:
|
|
795
|
+
self.logger.info("No SensitiveMeta found for video %s, creating default", video.uuid)
|
|
775
796
|
default_data = {
|
|
776
797
|
"patient_first_name": "Patient",
|
|
777
|
-
"patient_last_name": "Unknown",
|
|
778
|
-
"patient_dob": date(1990, 1, 1),
|
|
798
|
+
"patient_last_name": "Unknown",
|
|
799
|
+
"patient_dob": date(1990, 1, 1),
|
|
779
800
|
"examination_date": date.today(),
|
|
780
|
-
"center_name":
|
|
801
|
+
"center_name": video.center.name if video.center else "university_hospital_wuerzburg",
|
|
781
802
|
}
|
|
782
|
-
|
|
783
803
|
try:
|
|
784
804
|
sensitive_meta = SensitiveMeta.create_from_dict(default_data)
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
# Mark sensitive meta as processed after creating default data
|
|
789
|
-
state = video_file.get_or_create_state()
|
|
805
|
+
video.sensitive_meta = sensitive_meta
|
|
806
|
+
video.save(update_fields=["sensitive_meta"])
|
|
807
|
+
state = video.get_or_create_state()
|
|
790
808
|
state.mark_sensitive_meta_processed(save=True)
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
self.logger.error(f"Failed to create default SensitiveMeta for video {video_file.uuid}: {e}")
|
|
809
|
+
self.logger.info("Created default SensitiveMeta for video %s", video.uuid)
|
|
810
|
+
except Exception as exc:
|
|
811
|
+
self.logger.error("Failed to create default SensitiveMeta for video %s: %s", video.uuid, exc)
|
|
795
812
|
return
|
|
796
|
-
|
|
797
813
|
else:
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
update_data = {}
|
|
801
|
-
|
|
802
|
-
if not video_file.sensitive_meta.patient_first_name:
|
|
814
|
+
update_data: Dict[str, Any] = {}
|
|
815
|
+
if not sensitive_meta.patient_first_name:
|
|
803
816
|
update_data["patient_first_name"] = "Patient"
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
if not video_file.sensitive_meta.patient_last_name:
|
|
817
|
+
if not sensitive_meta.patient_last_name:
|
|
807
818
|
update_data["patient_last_name"] = "Unknown"
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
if not video_file.sensitive_meta.patient_dob:
|
|
819
|
+
if not sensitive_meta.patient_dob:
|
|
811
820
|
update_data["patient_dob"] = date(1990, 1, 1)
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
if not video_file.sensitive_meta.examination_date:
|
|
821
|
+
if not sensitive_meta.examination_date:
|
|
815
822
|
update_data["examination_date"] = date.today()
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
if update_needed:
|
|
823
|
+
|
|
824
|
+
if update_data:
|
|
819
825
|
try:
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
# Mark sensitive meta as processed after updating missing fields
|
|
823
|
-
state = video_file.get_or_create_state()
|
|
826
|
+
sensitive_meta.update_from_dict(update_data)
|
|
827
|
+
state = video.get_or_create_state()
|
|
824
828
|
state.mark_sensitive_meta_processed(save=True)
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
+
self.logger.info(
|
|
830
|
+
"Updated missing SensitiveMeta fields for video %s: %s",
|
|
831
|
+
video.uuid,
|
|
832
|
+
list(update_data.keys()),
|
|
833
|
+
)
|
|
834
|
+
except Exception as exc:
|
|
835
|
+
self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
|
|
836
|
+
|
|
829
837
|
|
|
830
838
|
|
|
831
839
|
def _ensure_frame_cleaning_available(self):
|
|
@@ -839,6 +847,9 @@ class VideoImportService():
|
|
|
839
847
|
# Check if we can find the lx-anonymizer directory
|
|
840
848
|
from importlib import resources
|
|
841
849
|
lx_anonymizer_path = resources.files("lx_anonymizer")
|
|
850
|
+
|
|
851
|
+
# make sure lx_anonymizer_path is a Path object
|
|
852
|
+
lx_anonymizer_path = Path(str(lx_anonymizer_path))
|
|
842
853
|
|
|
843
854
|
if lx_anonymizer_path.exists():
|
|
844
855
|
# Add to Python path temporarily
|
|
@@ -864,39 +875,7 @@ class VideoImportService():
|
|
|
864
875
|
|
|
865
876
|
return False, None, None
|
|
866
877
|
|
|
867
|
-
|
|
868
|
-
"""Get processor ROI information for masking."""
|
|
869
|
-
processor_roi = None
|
|
870
|
-
endoscope_roi = None
|
|
871
|
-
|
|
872
|
-
try:
|
|
873
|
-
if self.current_video.video_meta and self.current_video.video_meta.processor:
|
|
874
|
-
processor = getattr(self.current_video.video_meta, "processor", None)
|
|
875
|
-
|
|
876
|
-
# Get the endoscope ROI for masking
|
|
877
|
-
endoscope_roi = processor.get_roi_endoscope_image()
|
|
878
|
-
|
|
879
|
-
# Get all processor ROIs for comprehensive masking
|
|
880
|
-
processor_roi = {
|
|
881
|
-
'endoscope_image': endoscope_roi,
|
|
882
|
-
'patient_first_name': processor.get_roi_patient_first_name(),
|
|
883
|
-
'patient_last_name': processor.get_roi_patient_last_name(),
|
|
884
|
-
'patient_dob': processor.get_roi_patient_dob(),
|
|
885
|
-
'examination_date': processor.get_roi_examination_date(),
|
|
886
|
-
'examination_time': processor.get_roi_examination_time(),
|
|
887
|
-
'endoscope_type': processor.get_roi_endoscope_type(),
|
|
888
|
-
'endoscopy_sn': processor.get_roi_endoscopy_sn(),
|
|
889
|
-
}
|
|
890
|
-
|
|
891
|
-
self.logger.info(f"Retrieved processor ROI information: endoscope_roi={endoscope_roi}")
|
|
892
|
-
else:
|
|
893
|
-
self.logger.warning(f"No processor found for video {self.current_video.uuid}, proceeding without ROI masking")
|
|
894
|
-
|
|
895
|
-
except Exception as e:
|
|
896
|
-
self.logger.error(f"Failed to retrieve processor ROI information: {e}")
|
|
897
|
-
# Continue without ROI - don't fail the entire import process
|
|
898
|
-
|
|
899
|
-
return processor_roi, endoscope_roi
|
|
878
|
+
|
|
900
879
|
|
|
901
880
|
def _perform_frame_cleaning(self, FrameCleaner, processor_roi, endoscope_roi):
|
|
902
881
|
"""Perform frame cleaning and anonymization."""
|
|
@@ -910,7 +889,9 @@ class VideoImportService():
|
|
|
910
889
|
raise RuntimeError(f"Raw video path not found: {raw_video_path}")
|
|
911
890
|
|
|
912
891
|
# Get processor name safely
|
|
913
|
-
|
|
892
|
+
video = self._require_current_video()
|
|
893
|
+
video_meta = getattr(video, "video_meta", None)
|
|
894
|
+
processor = getattr(video_meta, "processor", None) if video_meta else None
|
|
914
895
|
device_name = processor.name if processor else self.processing_context['processor_name']
|
|
915
896
|
|
|
916
897
|
tmp_dir = RAW_FRAME_DIR
|
|
@@ -920,22 +901,25 @@ class VideoImportService():
|
|
|
920
901
|
cleaned_filename = f"cleaned_{video_filename}"
|
|
921
902
|
cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
|
|
922
903
|
|
|
904
|
+
processor_roi, endoscope_roi = self._get_processor_roi_info(video)
|
|
905
|
+
|
|
906
|
+
# Processor roi can be used later to OCR preknown regions.
|
|
907
|
+
|
|
923
908
|
# Clean video with ROI masking (heavy I/O operation)
|
|
924
909
|
actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
|
|
925
|
-
Path(raw_video_path),
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
cleaned_video_path
|
|
910
|
+
video_path=Path(raw_video_path),
|
|
911
|
+
video_file_obj=video,
|
|
912
|
+
device_name=device_name,
|
|
913
|
+
endoscope_roi=endoscope_roi,
|
|
914
|
+
output_path=cleaned_video_path,
|
|
915
|
+
technique="mask_overlay"
|
|
932
916
|
)
|
|
933
917
|
|
|
934
918
|
# Optional: enrich metadata using TrOCR+LLM on one random extracted frame
|
|
935
919
|
try:
|
|
936
920
|
# Prefer frames belonging to this video (UUID in path), else pick any frame
|
|
937
921
|
frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
|
|
938
|
-
video_uuid = str(
|
|
922
|
+
video_uuid = str(video.uuid)
|
|
939
923
|
filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
|
|
940
924
|
if filtered:
|
|
941
925
|
sample_frame = random.choice(filtered)
|
|
@@ -973,10 +957,13 @@ class VideoImportService():
|
|
|
973
957
|
SAFETY MECHANISM: Only updates fields that are empty, default values, or explicitly marked as safe to overwrite.
|
|
974
958
|
This prevents accidentally overwriting valuable manually entered or previously extracted data.
|
|
975
959
|
"""
|
|
976
|
-
|
|
960
|
+
video = self._require_current_video()
|
|
961
|
+
sensitive_meta = getattr(video, "sensitive_meta", None)
|
|
962
|
+
|
|
963
|
+
if not (sensitive_meta and extracted_metadata):
|
|
977
964
|
return
|
|
978
|
-
|
|
979
|
-
sm =
|
|
965
|
+
|
|
966
|
+
sm = sensitive_meta
|
|
980
967
|
updated_fields = []
|
|
981
968
|
|
|
982
969
|
# Map extracted metadata to SensitiveMeta fields
|
|
@@ -1006,48 +993,71 @@ class VideoImportService():
|
|
|
1006
993
|
|
|
1007
994
|
# Enhanced safety check: Only update if current value is safe to overwrite
|
|
1008
995
|
if new_value and (old_value in SAFE_TO_OVERWRITE_VALUES):
|
|
1009
|
-
self.logger.info(
|
|
996
|
+
self.logger.info(
|
|
997
|
+
"Updating %s from '%s' to '%s' for video %s",
|
|
998
|
+
sm_field,
|
|
999
|
+
old_value,
|
|
1000
|
+
new_value,
|
|
1001
|
+
video.uuid,
|
|
1002
|
+
)
|
|
1010
1003
|
setattr(sm, sm_field, new_value)
|
|
1011
1004
|
updated_fields.append(sm_field)
|
|
1012
1005
|
elif new_value and old_value and old_value not in SAFE_TO_OVERWRITE_VALUES:
|
|
1013
|
-
self.logger.info(
|
|
1006
|
+
self.logger.info(
|
|
1007
|
+
"Preserving existing %s value '%s' (not overwriting with '%s') for video %s",
|
|
1008
|
+
sm_field,
|
|
1009
|
+
old_value,
|
|
1010
|
+
new_value,
|
|
1011
|
+
video.uuid,
|
|
1012
|
+
)
|
|
1014
1013
|
|
|
1015
1014
|
if updated_fields:
|
|
1016
1015
|
sm.save(update_fields=updated_fields)
|
|
1017
|
-
self.logger.info(
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
self.logger.info(
|
|
1016
|
+
self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
|
|
1017
|
+
|
|
1018
|
+
state = video.get_or_create_state()
|
|
1019
|
+
state.mark_sensitive_meta_processed(save=True)
|
|
1020
|
+
self.logger.info("Marked sensitive metadata as processed for video %s", video.uuid)
|
|
1022
1021
|
else:
|
|
1023
|
-
self.logger.info(
|
|
1022
|
+
self.logger.info("No SensitiveMeta fields updated for video %s - all existing values preserved", video.uuid)
|
|
1024
1023
|
|
|
1025
1024
|
def _signal_completion(self):
|
|
1026
1025
|
"""Signal completion to the tracking system."""
|
|
1027
1026
|
try:
|
|
1027
|
+
video = self._require_current_video()
|
|
1028
|
+
|
|
1029
|
+
raw_field: FieldFile | None = getattr(video, "raw_file", None)
|
|
1030
|
+
raw_exists = False
|
|
1031
|
+
if raw_field and getattr(raw_field, "path", None):
|
|
1032
|
+
try:
|
|
1033
|
+
raw_exists = Path(raw_field.path).exists()
|
|
1034
|
+
except (ValueError, OSError):
|
|
1035
|
+
raw_exists = False
|
|
1036
|
+
|
|
1028
1037
|
video_processing_complete = (
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
hasattr(self.current_video.raw_file, 'path') and
|
|
1033
|
-
Path(self.current_video.raw_file.path).exists()
|
|
1038
|
+
video.sensitive_meta is not None and
|
|
1039
|
+
video.video_meta is not None and
|
|
1040
|
+
raw_exists
|
|
1034
1041
|
)
|
|
1035
|
-
|
|
1042
|
+
|
|
1036
1043
|
if video_processing_complete:
|
|
1037
|
-
self.logger.info(
|
|
1038
|
-
|
|
1044
|
+
self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
|
|
1045
|
+
|
|
1039
1046
|
# Update completion flags if they exist
|
|
1040
1047
|
completion_fields = []
|
|
1041
1048
|
for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
|
|
1042
|
-
if hasattr(
|
|
1043
|
-
setattr(
|
|
1049
|
+
if hasattr(video, field_name):
|
|
1050
|
+
setattr(video, field_name, True)
|
|
1044
1051
|
completion_fields.append(field_name)
|
|
1045
1052
|
|
|
1046
1053
|
if completion_fields:
|
|
1047
|
-
|
|
1048
|
-
self.logger.info(
|
|
1054
|
+
video.save(update_fields=completion_fields)
|
|
1055
|
+
self.logger.info("Updated completion flags: %s", completion_fields)
|
|
1049
1056
|
else:
|
|
1050
|
-
self.logger.warning(
|
|
1057
|
+
self.logger.warning(
|
|
1058
|
+
"Video %s processing incomplete - missing required components",
|
|
1059
|
+
video.uuid,
|
|
1060
|
+
)
|
|
1051
1061
|
|
|
1052
1062
|
except Exception as e:
|
|
1053
1063
|
self.logger.warning(f"Failed to signal completion status: {e}")
|
|
@@ -1090,6 +1100,9 @@ class VideoImportService():
|
|
|
1090
1100
|
self.processed_files.remove(file_path_str)
|
|
1091
1101
|
self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
|
|
1092
1102
|
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
|
|
1093
1106
|
except Exception as e:
|
|
1094
1107
|
self.logger.warning(f"Error during context cleanup: {e}")
|
|
1095
1108
|
finally:
|
|
@@ -1104,7 +1117,7 @@ def import_and_anonymize(
|
|
|
1104
1117
|
processor_name: str,
|
|
1105
1118
|
save_video: bool = True,
|
|
1106
1119
|
delete_source: bool = False,
|
|
1107
|
-
) ->
|
|
1120
|
+
) -> VideoFile | None:
|
|
1108
1121
|
"""Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
|
|
1109
1122
|
Kept for backward compatibility with callers that import this function directly.
|
|
1110
1123
|
"""
|