endoreg-db 0.8.3.3__py3-none-any.whl → 0.8.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +23 -1
- endoreg_db/data/setup_config.yaml +38 -0
- endoreg_db/management/commands/create_model_meta_from_huggingface.py +1 -2
- endoreg_db/management/commands/load_ai_model_data.py +18 -15
- endoreg_db/management/commands/setup_endoreg_db.py +218 -33
- endoreg_db/models/media/pdf/raw_pdf.py +241 -97
- endoreg_db/models/media/video/pipe_1.py +30 -33
- endoreg_db/models/media/video/video_file.py +300 -187
- endoreg_db/models/medical/hardware/endoscopy_processor.py +10 -1
- endoreg_db/models/metadata/model_meta_logic.py +34 -45
- endoreg_db/models/metadata/sensitive_meta_logic.py +555 -150
- endoreg_db/serializers/__init__.py +26 -55
- endoreg_db/serializers/misc/__init__.py +1 -1
- endoreg_db/serializers/misc/file_overview.py +65 -35
- endoreg_db/serializers/misc/{vop_patient_data.py → sensitive_patient_data.py} +1 -1
- endoreg_db/serializers/video_examination.py +198 -0
- endoreg_db/services/lookup_service.py +228 -58
- endoreg_db/services/lookup_store.py +174 -30
- endoreg_db/services/pdf_import.py +585 -282
- endoreg_db/services/video_import.py +493 -240
- endoreg_db/urls/__init__.py +36 -23
- endoreg_db/urls/label_video_segments.py +2 -0
- endoreg_db/urls/media.py +103 -66
- endoreg_db/utils/setup_config.py +177 -0
- endoreg_db/views/__init__.py +5 -3
- endoreg_db/views/media/pdf_media.py +3 -1
- endoreg_db/views/media/video_media.py +1 -1
- endoreg_db/views/media/video_segments.py +187 -259
- endoreg_db/views/pdf/__init__.py +5 -8
- endoreg_db/views/pdf/pdf_stream.py +186 -0
- endoreg_db/views/pdf/reimport.py +110 -94
- endoreg_db/views/requirement/lookup.py +171 -287
- endoreg_db/views/video/__init__.py +0 -2
- endoreg_db/views/video/video_examination_viewset.py +202 -289
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/METADATA +1 -2
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/RECORD +38 -37
- endoreg_db/views/pdf/pdf_media.py +0 -239
- endoreg_db/views/pdf/pdf_stream_views.py +0 -127
- endoreg_db/views/video/video_media.py +0 -158
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,73 +8,96 @@ Changelog:
|
|
|
8
8
|
October 14, 2025: Added file locking mechanism to prevent race conditions
|
|
9
9
|
during concurrent video imports (matches PDF import pattern)
|
|
10
10
|
"""
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
import logging
|
|
13
|
-
import sys
|
|
14
13
|
import os
|
|
14
|
+
import random
|
|
15
15
|
import shutil
|
|
16
|
+
import sys
|
|
16
17
|
import time
|
|
17
18
|
from contextlib import contextmanager
|
|
19
|
+
from datetime import date
|
|
18
20
|
from pathlib import Path
|
|
19
|
-
from typing import
|
|
21
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
22
|
+
|
|
20
23
|
from django.db import transaction
|
|
21
|
-
from endoreg_db.models import VideoFile, SensitiveMeta
|
|
22
|
-
from endoreg_db.utils.paths import STORAGE_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
|
|
23
|
-
import random
|
|
24
|
-
from endoreg_db.utils.hashs import get_video_hash
|
|
25
|
-
from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
|
|
26
24
|
from django.db.models.fields.files import FieldFile
|
|
27
|
-
from
|
|
25
|
+
from lx_anonymizer import FrameCleaner
|
|
26
|
+
from moviepy import video
|
|
27
|
+
|
|
28
|
+
from endoreg_db.models import EndoscopyProcessor, SensitiveMeta, VideoFile
|
|
29
|
+
from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
|
|
30
|
+
from endoreg_db.utils.hashs import get_video_hash
|
|
31
|
+
from endoreg_db.utils.paths import ANONYM_VIDEO_DIR, STORAGE_DIR, VIDEO_DIR
|
|
28
32
|
|
|
29
33
|
# File lock configuration (matches PDF import)
|
|
30
34
|
STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
|
|
31
|
-
MAX_LOCK_WAIT_SECONDS =
|
|
35
|
+
MAX_LOCK_WAIT_SECONDS = (
|
|
36
|
+
90 # New: wait up to 90s for a non-stale lock to clear before skipping
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
logger = logging.getLogger(__name__)
|
|
34
40
|
|
|
35
41
|
|
|
36
|
-
class VideoImportService
|
|
42
|
+
class VideoImportService:
|
|
37
43
|
"""
|
|
38
44
|
Service for importing and anonymizing video files.
|
|
39
45
|
Uses a central video instance pattern for cleaner state management.
|
|
40
|
-
|
|
46
|
+
|
|
41
47
|
Features (October 14, 2025):
|
|
42
48
|
- File locking to prevent concurrent processing of the same video
|
|
43
49
|
- Stale lock detection and reclamation (600s timeout)
|
|
44
50
|
- Hash-based duplicate detection
|
|
45
51
|
- Graceful fallback processing without lx_anonymizer
|
|
46
52
|
"""
|
|
47
|
-
|
|
53
|
+
|
|
48
54
|
def __init__(self, project_root: Optional[Path] = None):
|
|
49
|
-
|
|
50
55
|
# Set up project root path
|
|
51
56
|
if project_root:
|
|
52
57
|
self.project_root = Path(project_root)
|
|
53
58
|
else:
|
|
54
59
|
self.project_root = Path(__file__).parent.parent.parent.parent
|
|
55
|
-
|
|
60
|
+
|
|
56
61
|
# Track processed files to prevent duplicates
|
|
57
|
-
|
|
58
|
-
|
|
62
|
+
try:
|
|
63
|
+
# Ensure anonym_video directory exists before listing files
|
|
64
|
+
anonym_video_dir = Path(ANONYM_VIDEO_DIR)
|
|
65
|
+
if anonym_video_dir.exists():
|
|
66
|
+
self.processed_files = set(
|
|
67
|
+
str(anonym_video_dir / file)
|
|
68
|
+
for file in os.listdir(ANONYM_VIDEO_DIR)
|
|
69
|
+
)
|
|
70
|
+
else:
|
|
71
|
+
logger.info(f"Creating anonym_videos directory: {anonym_video_dir}")
|
|
72
|
+
anonym_video_dir.mkdir(parents=True, exist_ok=True)
|
|
73
|
+
self.processed_files = set()
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.warning(f"Failed to initialize processed files tracking: {e}")
|
|
76
|
+
self.processed_files = set()
|
|
77
|
+
|
|
59
78
|
# Central video instance and processing context
|
|
60
79
|
self.current_video: Optional[VideoFile] = None
|
|
61
80
|
self.processing_context: Dict[str, Any] = {}
|
|
62
|
-
|
|
81
|
+
|
|
63
82
|
self.delete_source = True
|
|
64
|
-
|
|
83
|
+
|
|
65
84
|
self.logger = logging.getLogger(__name__)
|
|
66
85
|
|
|
86
|
+
self.cleaner = (
|
|
87
|
+
None # This gets instantiated in the perform_frame_cleaning method
|
|
88
|
+
)
|
|
89
|
+
|
|
67
90
|
def _require_current_video(self) -> VideoFile:
|
|
68
91
|
"""Return the current VideoFile or raise if it has not been initialized."""
|
|
69
92
|
if self.current_video is None:
|
|
70
93
|
raise RuntimeError("Current video instance is not set")
|
|
71
94
|
return self.current_video
|
|
72
|
-
|
|
95
|
+
|
|
73
96
|
@contextmanager
|
|
74
97
|
def _file_lock(self, path: Path):
|
|
75
98
|
"""
|
|
76
99
|
Create a file lock to prevent duplicate processing of the same video.
|
|
77
|
-
|
|
100
|
+
|
|
78
101
|
This context manager creates a .lock file alongside the video file.
|
|
79
102
|
If the lock file already exists, it checks if it's stale (older than
|
|
80
103
|
STALE_LOCK_SECONDS) and reclaims it if necessary. If it's not stale,
|
|
@@ -98,24 +121,27 @@ class VideoImportService():
|
|
|
98
121
|
except FileNotFoundError:
|
|
99
122
|
# Race: lock removed between exists and stat; retry acquire in next loop
|
|
100
123
|
age = None
|
|
101
|
-
|
|
124
|
+
|
|
102
125
|
if age is not None and age > STALE_LOCK_SECONDS:
|
|
103
126
|
try:
|
|
104
127
|
logger.warning(
|
|
105
128
|
"Stale lock detected for %s (age %.0fs). Reclaiming lock...",
|
|
106
|
-
path,
|
|
129
|
+
path,
|
|
130
|
+
age,
|
|
107
131
|
)
|
|
108
132
|
lock_path.unlink()
|
|
109
133
|
except Exception as e:
|
|
110
|
-
logger.warning(
|
|
134
|
+
logger.warning(
|
|
135
|
+
"Failed to remove stale lock %s: %s", lock_path, e
|
|
136
|
+
)
|
|
111
137
|
# Loop continues and retries acquire immediately
|
|
112
138
|
continue
|
|
113
|
-
|
|
139
|
+
|
|
114
140
|
# Not stale: wait until deadline, then give up gracefully
|
|
115
141
|
if time.time() >= deadline:
|
|
116
142
|
raise ValueError(f"File already being processed: {path}")
|
|
117
143
|
time.sleep(1.0)
|
|
118
|
-
|
|
144
|
+
|
|
119
145
|
os.write(fd, b"lock")
|
|
120
146
|
os.close(fd)
|
|
121
147
|
fd = None
|
|
@@ -128,11 +154,11 @@ class VideoImportService():
|
|
|
128
154
|
lock_path.unlink()
|
|
129
155
|
except OSError:
|
|
130
156
|
pass
|
|
131
|
-
|
|
157
|
+
|
|
132
158
|
def processed(self) -> bool:
|
|
133
159
|
"""Indicates if the current file has already been processed."""
|
|
134
|
-
return getattr(self,
|
|
135
|
-
|
|
160
|
+
return getattr(self, "_processed", False)
|
|
161
|
+
|
|
136
162
|
def import_and_anonymize(
|
|
137
163
|
self,
|
|
138
164
|
file_path: Union[Path, str],
|
|
@@ -145,11 +171,15 @@ class VideoImportService():
|
|
|
145
171
|
High-level helper that orchestrates the complete video import and anonymization process.
|
|
146
172
|
Uses the central video instance pattern for improved state management.
|
|
147
173
|
"""
|
|
174
|
+
# DEFENSIVE: Initialize processing_context immediately to prevent KeyError crashes
|
|
175
|
+
self.processing_context = {"file_path": Path(file_path)}
|
|
176
|
+
|
|
148
177
|
try:
|
|
149
178
|
# Initialize processing context
|
|
150
|
-
self._initialize_processing_context(
|
|
151
|
-
|
|
152
|
-
|
|
179
|
+
self._initialize_processing_context(
|
|
180
|
+
file_path, center_name, processor_name, save_video, delete_source
|
|
181
|
+
)
|
|
182
|
+
|
|
153
183
|
# Validate and prepare file (may raise ValueError if another worker holds a non-stale lock)
|
|
154
184
|
try:
|
|
155
185
|
self._validate_and_prepare_file()
|
|
@@ -159,115 +189,130 @@ class VideoImportService():
|
|
|
159
189
|
self.logger.info(f"Skipping {file_path}: {ve}")
|
|
160
190
|
return None
|
|
161
191
|
raise
|
|
162
|
-
|
|
192
|
+
|
|
163
193
|
# Create or retrieve video instance
|
|
164
194
|
self._create_or_retrieve_video_instance()
|
|
165
|
-
|
|
195
|
+
|
|
166
196
|
# Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
|
|
167
197
|
self._create_sensitive_file()
|
|
168
|
-
|
|
198
|
+
|
|
169
199
|
# Setup processing environment
|
|
170
200
|
self._setup_processing_environment()
|
|
171
|
-
|
|
201
|
+
|
|
172
202
|
# Process frames and metadata
|
|
173
203
|
self._process_frames_and_metadata()
|
|
174
|
-
|
|
204
|
+
|
|
175
205
|
# Finalize processing
|
|
176
206
|
self._finalize_processing()
|
|
177
|
-
|
|
207
|
+
|
|
178
208
|
# Move files and cleanup
|
|
179
209
|
self._cleanup_and_archive()
|
|
180
|
-
|
|
210
|
+
|
|
181
211
|
return self.current_video
|
|
182
|
-
|
|
212
|
+
|
|
183
213
|
except Exception as e:
|
|
184
|
-
|
|
214
|
+
# Safe file path access - handles cases where processing_context wasn't initialized
|
|
215
|
+
safe_file_path = getattr(self, "processing_context", {}).get(
|
|
216
|
+
"file_path", file_path
|
|
217
|
+
)
|
|
218
|
+
# Debug: Log context state for troubleshooting
|
|
219
|
+
context_keys = list(getattr(self, "processing_context", {}).keys())
|
|
220
|
+
self.logger.debug(f"Context keys during error: {context_keys}")
|
|
221
|
+
self.logger.error(
|
|
222
|
+
f"Video import and anonymization failed for {safe_file_path}: {e}"
|
|
223
|
+
)
|
|
185
224
|
self._cleanup_on_error()
|
|
186
225
|
raise
|
|
187
226
|
finally:
|
|
188
227
|
self._cleanup_processing_context()
|
|
189
228
|
|
|
190
|
-
def _initialize_processing_context(
|
|
191
|
-
|
|
229
|
+
def _initialize_processing_context(
|
|
230
|
+
self,
|
|
231
|
+
file_path: Union[Path, str],
|
|
232
|
+
center_name: str,
|
|
233
|
+
processor_name: str,
|
|
234
|
+
save_video: bool,
|
|
235
|
+
delete_source: bool,
|
|
236
|
+
):
|
|
192
237
|
"""Initialize the processing context for the current video import."""
|
|
193
238
|
self.processing_context = {
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
239
|
+
"file_path": Path(file_path),
|
|
240
|
+
"center_name": center_name,
|
|
241
|
+
"processor_name": processor_name,
|
|
242
|
+
"save_video": save_video,
|
|
243
|
+
"delete_source": delete_source,
|
|
244
|
+
"processing_started": False,
|
|
245
|
+
"frames_extracted": False,
|
|
246
|
+
"anonymization_completed": False,
|
|
247
|
+
"error_reason": None,
|
|
203
248
|
}
|
|
204
|
-
|
|
249
|
+
|
|
205
250
|
self.logger.info(f"Initialized processing context for: {file_path}")
|
|
206
251
|
|
|
207
252
|
def _validate_and_prepare_file(self):
|
|
208
253
|
"""
|
|
209
254
|
Validate the video file and prepare for processing.
|
|
210
|
-
|
|
255
|
+
|
|
211
256
|
Uses file locking to prevent concurrent processing of the same video file.
|
|
212
257
|
This prevents race conditions where multiple workers might try to process
|
|
213
258
|
the same video simultaneously.
|
|
214
|
-
|
|
259
|
+
|
|
215
260
|
The lock is acquired here and held for the entire import process.
|
|
216
261
|
See _file_lock() for lock reclamation logic.
|
|
217
262
|
"""
|
|
218
|
-
file_path = self.processing_context[
|
|
219
|
-
|
|
263
|
+
file_path = self.processing_context["file_path"]
|
|
264
|
+
|
|
220
265
|
# Acquire file lock to prevent concurrent processing
|
|
221
266
|
# Lock will be held until finally block in import_and_anonymize()
|
|
222
267
|
try:
|
|
223
|
-
self.processing_context[
|
|
224
|
-
self.processing_context[
|
|
268
|
+
self.processing_context["_lock_context"] = self._file_lock(file_path)
|
|
269
|
+
self.processing_context["_lock_context"].__enter__()
|
|
225
270
|
except Exception:
|
|
226
271
|
self._cleanup_processing_context()
|
|
227
272
|
raise
|
|
228
|
-
|
|
273
|
+
|
|
229
274
|
self.logger.info("Acquired file lock for: %s", file_path)
|
|
230
|
-
|
|
275
|
+
|
|
231
276
|
# Check if already processed (memory-based check)
|
|
232
277
|
if str(file_path) in self.processed_files:
|
|
233
278
|
self.logger.info("File %s already processed, skipping", file_path)
|
|
234
279
|
self._processed = True
|
|
235
280
|
raise ValueError(f"File already processed: {file_path}")
|
|
236
|
-
|
|
281
|
+
|
|
237
282
|
# Check file exists
|
|
238
283
|
if not file_path.exists():
|
|
239
284
|
raise FileNotFoundError(f"Video file not found: {file_path}")
|
|
240
|
-
|
|
285
|
+
|
|
241
286
|
self.logger.info("File validation completed for: %s", file_path)
|
|
242
287
|
|
|
243
288
|
def _create_or_retrieve_video_instance(self):
|
|
244
289
|
"""Create or retrieve the VideoFile instance and move to final storage."""
|
|
245
|
-
|
|
290
|
+
|
|
246
291
|
self.logger.info("Creating VideoFile instance...")
|
|
247
|
-
|
|
292
|
+
|
|
248
293
|
self.current_video = VideoFile.create_from_file_initialized(
|
|
249
|
-
file_path=self.processing_context[
|
|
250
|
-
center_name=self.processing_context[
|
|
251
|
-
processor_name=self.processing_context[
|
|
252
|
-
delete_source=self.processing_context[
|
|
253
|
-
save_video_file=self.processing_context[
|
|
294
|
+
file_path=self.processing_context["file_path"],
|
|
295
|
+
center_name=self.processing_context["center_name"],
|
|
296
|
+
processor_name=self.processing_context["processor_name"],
|
|
297
|
+
delete_source=self.processing_context["delete_source"],
|
|
298
|
+
save_video_file=self.processing_context["save_video"],
|
|
254
299
|
)
|
|
255
|
-
|
|
300
|
+
|
|
256
301
|
if not self.current_video:
|
|
257
302
|
raise RuntimeError("Failed to create VideoFile instance")
|
|
258
|
-
|
|
303
|
+
|
|
259
304
|
# Immediately move to final storage locations
|
|
260
305
|
self._move_to_final_storage()
|
|
261
|
-
|
|
306
|
+
|
|
262
307
|
self.logger.info("Created VideoFile with UUID: %s", self.current_video.uuid)
|
|
263
|
-
|
|
308
|
+
|
|
264
309
|
# Get and mark processing state
|
|
265
310
|
state = VideoFile.get_or_create_state(self.current_video)
|
|
266
311
|
if not state:
|
|
267
312
|
raise RuntimeError("Failed to create VideoFile state")
|
|
268
|
-
|
|
313
|
+
|
|
269
314
|
state.mark_processing_started(save=True)
|
|
270
|
-
self.processing_context[
|
|
315
|
+
self.processing_context["processing_started"] = True
|
|
271
316
|
|
|
272
317
|
def _move_to_final_storage(self):
|
|
273
318
|
"""
|
|
@@ -301,12 +346,23 @@ class VideoImportService():
|
|
|
301
346
|
except Exception:
|
|
302
347
|
stored_raw_path = None
|
|
303
348
|
|
|
304
|
-
# Fallback: derive from UUID + suffix
|
|
349
|
+
# Fallback: derive from UUID + suffix - ALWAYS use UUID for consistency
|
|
305
350
|
if not stored_raw_path:
|
|
306
351
|
suffix = source_path.suffix or ".mp4"
|
|
307
352
|
uuid_str = getattr(_current_video, "uuid", None)
|
|
308
|
-
|
|
353
|
+
if uuid_str:
|
|
354
|
+
filename = f"{uuid_str}{suffix}"
|
|
355
|
+
else:
|
|
356
|
+
# Emergency fallback with timestamp to avoid conflicts
|
|
357
|
+
import time
|
|
358
|
+
|
|
359
|
+
timestamp = int(time.time())
|
|
360
|
+
filename = f"video_{timestamp}{suffix}"
|
|
361
|
+
self.logger.warning(
|
|
362
|
+
"No UUID available, using timestamp-based filename: %s", filename
|
|
363
|
+
)
|
|
309
364
|
stored_raw_path = videos_dir / filename
|
|
365
|
+
self.logger.debug("Using UUID-based raw filename: %s", filename)
|
|
310
366
|
|
|
311
367
|
delete_source = bool(self.processing_context.get("delete_source", True))
|
|
312
368
|
stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -321,7 +377,9 @@ class VideoImportService():
|
|
|
321
377
|
except Exception:
|
|
322
378
|
shutil.copy2(source_path, stored_raw_path)
|
|
323
379
|
os.remove(source_path)
|
|
324
|
-
self.logger.info(
|
|
380
|
+
self.logger.info(
|
|
381
|
+
"Copied & removed raw video to: %s", stored_raw_path
|
|
382
|
+
)
|
|
325
383
|
else:
|
|
326
384
|
shutil.copy2(source_path, stored_raw_path)
|
|
327
385
|
self.logger.info("Copied raw video to: %s", stored_raw_path)
|
|
@@ -344,7 +402,6 @@ class VideoImportService():
|
|
|
344
402
|
self.processing_context["raw_video_path"] = stored_raw_path
|
|
345
403
|
self.processing_context["video_filename"] = stored_raw_path.name
|
|
346
404
|
|
|
347
|
-
|
|
348
405
|
def _setup_processing_environment(self):
|
|
349
406
|
"""Setup the processing environment without file movement."""
|
|
350
407
|
video = self._require_current_video()
|
|
@@ -352,71 +409,96 @@ class VideoImportService():
|
|
|
352
409
|
# Initialize video specifications
|
|
353
410
|
video.initialize_video_specs()
|
|
354
411
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
412
|
+
|
|
413
|
+
|
|
358
414
|
# Extract frames BEFORE processing to prevent pipeline 1 conflicts
|
|
359
415
|
self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
|
|
360
416
|
try:
|
|
361
417
|
frames_extracted = video.extract_frames(overwrite=False)
|
|
362
418
|
if frames_extracted:
|
|
363
|
-
self.processing_context[
|
|
419
|
+
self.processing_context["frames_extracted"] = True
|
|
364
420
|
self.logger.info("Frame extraction completed successfully")
|
|
365
|
-
|
|
421
|
+
# Initialize frame objects in database
|
|
422
|
+
video.initialize_frames(video.get_frame_paths())
|
|
423
|
+
|
|
366
424
|
# CRITICAL: Immediately save the frames_extracted state to database
|
|
367
425
|
# to prevent refresh_from_db() in pipeline 1 from overriding it
|
|
368
426
|
state = video.get_or_create_state()
|
|
369
427
|
if not state.frames_extracted:
|
|
370
428
|
state.frames_extracted = True
|
|
371
|
-
state.save(update_fields=[
|
|
429
|
+
state.save(update_fields=["frames_extracted"])
|
|
372
430
|
self.logger.info("Persisted frames_extracted=True to database")
|
|
373
431
|
else:
|
|
374
432
|
self.logger.warning("Frame extraction failed, but continuing...")
|
|
375
|
-
self.processing_context[
|
|
433
|
+
self.processing_context["frames_extracted"] = False
|
|
376
434
|
except Exception as e:
|
|
377
|
-
self.logger.warning(
|
|
378
|
-
|
|
379
|
-
|
|
435
|
+
self.logger.warning(
|
|
436
|
+
f"Frame extraction failed during setup: {e}, but continuing..."
|
|
437
|
+
)
|
|
438
|
+
self.processing_context["frames_extracted"] = False
|
|
439
|
+
|
|
380
440
|
# Ensure default patient data
|
|
381
441
|
self._ensure_default_patient_data(video_instance=video)
|
|
382
|
-
|
|
442
|
+
|
|
383
443
|
self.logger.info("Processing environment setup completed")
|
|
384
444
|
|
|
385
445
|
def _process_frames_and_metadata(self):
|
|
386
446
|
"""Process frames and extract metadata with anonymization."""
|
|
387
447
|
# Check frame cleaning availability
|
|
388
|
-
frame_cleaning_available, frame_cleaner
|
|
448
|
+
frame_cleaning_available, frame_cleaner = (
|
|
449
|
+
self._ensure_frame_cleaning_available()
|
|
450
|
+
)
|
|
389
451
|
video = self._require_current_video()
|
|
390
452
|
|
|
391
453
|
raw_file_field = video.raw_file
|
|
392
|
-
has_raw_file = isinstance(raw_file_field, FieldFile) and bool(
|
|
454
|
+
has_raw_file = isinstance(raw_file_field, FieldFile) and bool(
|
|
455
|
+
raw_file_field.name
|
|
456
|
+
)
|
|
393
457
|
|
|
394
458
|
if not (frame_cleaning_available and has_raw_file):
|
|
395
|
-
self.logger.warning(
|
|
459
|
+
self.logger.warning(
|
|
460
|
+
"Frame cleaning not available or conditions not met, using fallback anonymization."
|
|
461
|
+
)
|
|
396
462
|
self._fallback_anonymize_video()
|
|
397
463
|
return
|
|
398
464
|
|
|
399
465
|
try:
|
|
400
|
-
self.logger.info(
|
|
401
|
-
|
|
466
|
+
self.logger.info(
|
|
467
|
+
"Starting frame-level anonymization with processor ROI masking..."
|
|
468
|
+
)
|
|
469
|
+
|
|
402
470
|
# Get processor ROI information
|
|
403
|
-
endoscope_data_roi_nested, endoscope_image_roi =
|
|
404
|
-
|
|
471
|
+
endoscope_data_roi_nested, endoscope_image_roi = (
|
|
472
|
+
self._get_processor_roi_info()
|
|
473
|
+
)
|
|
474
|
+
|
|
405
475
|
# Perform frame cleaning with timeout to prevent blocking
|
|
406
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
407
|
-
|
|
476
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
477
|
+
from concurrent.futures import TimeoutError as FutureTimeoutError
|
|
478
|
+
|
|
408
479
|
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
409
|
-
future = executor.submit(
|
|
480
|
+
future = executor.submit(
|
|
481
|
+
self._perform_frame_cleaning,
|
|
482
|
+
endoscope_data_roi_nested,
|
|
483
|
+
endoscope_image_roi,
|
|
484
|
+
)
|
|
410
485
|
try:
|
|
411
486
|
# Increased timeout to better accommodate ffmpeg + OCR
|
|
412
|
-
future.result(timeout=
|
|
413
|
-
self.processing_context[
|
|
414
|
-
self.logger.info(
|
|
487
|
+
future.result(timeout=50000)
|
|
488
|
+
self.processing_context["anonymization_completed"] = True
|
|
489
|
+
self.logger.info(
|
|
490
|
+
"Frame cleaning completed successfully within timeout"
|
|
491
|
+
)
|
|
415
492
|
except FutureTimeoutError:
|
|
416
|
-
self.logger.warning(
|
|
493
|
+
self.logger.warning(
|
|
494
|
+
"Frame cleaning timed out; entering grace period check for cleaned output"
|
|
495
|
+
)
|
|
417
496
|
# Grace period: detect if cleaned file appears shortly after timeout
|
|
418
|
-
raw_video_path = self.processing_context.get(
|
|
419
|
-
video_filename = self.processing_context.get(
|
|
497
|
+
raw_video_path = self.processing_context.get("raw_video_path")
|
|
498
|
+
video_filename = self.processing_context.get(
|
|
499
|
+
"video_filename",
|
|
500
|
+
Path(raw_video_path).name if raw_video_path else "video.mp4",
|
|
501
|
+
)
|
|
420
502
|
grace_seconds = 60
|
|
421
503
|
expected_cleaned_path: Optional[Path] = None
|
|
422
504
|
processed_field = video.processed_file
|
|
@@ -429,46 +511,68 @@ class VideoImportService():
|
|
|
429
511
|
if expected_cleaned_path is not None:
|
|
430
512
|
for _ in range(grace_seconds):
|
|
431
513
|
if expected_cleaned_path.exists():
|
|
432
|
-
self.processing_context[
|
|
433
|
-
|
|
434
|
-
|
|
514
|
+
self.processing_context["cleaned_video_path"] = (
|
|
515
|
+
expected_cleaned_path
|
|
516
|
+
)
|
|
517
|
+
self.processing_context["anonymization_completed"] = (
|
|
518
|
+
True
|
|
519
|
+
)
|
|
520
|
+
self.logger.info(
|
|
521
|
+
"Detected cleaned video during grace period: %s",
|
|
522
|
+
expected_cleaned_path,
|
|
523
|
+
)
|
|
435
524
|
found = True
|
|
436
525
|
break
|
|
437
526
|
time.sleep(1)
|
|
438
527
|
else:
|
|
439
528
|
self._fallback_anonymize_video()
|
|
440
529
|
if not found:
|
|
441
|
-
raise TimeoutError(
|
|
530
|
+
raise TimeoutError(
|
|
531
|
+
"Frame cleaning operation timed out - likely Ollama connection issue"
|
|
532
|
+
)
|
|
442
533
|
|
|
443
534
|
except Exception as e:
|
|
444
|
-
self.logger.warning(
|
|
535
|
+
self.logger.warning(
|
|
536
|
+
"Frame cleaning failed (reason: %s), falling back to simple copy", e
|
|
537
|
+
)
|
|
445
538
|
# Try fallback anonymization when frame cleaning fails
|
|
446
539
|
try:
|
|
447
540
|
self._fallback_anonymize_video()
|
|
448
541
|
except Exception as fallback_error:
|
|
449
|
-
self.logger.error(
|
|
542
|
+
self.logger.error(
|
|
543
|
+
"Fallback anonymization also failed: %s", fallback_error
|
|
544
|
+
)
|
|
450
545
|
# If even fallback fails, mark as not anonymized but continue import
|
|
451
|
-
self.processing_context[
|
|
452
|
-
self.processing_context[
|
|
546
|
+
self.processing_context["anonymization_completed"] = False
|
|
547
|
+
self.processing_context["error_reason"] = (
|
|
548
|
+
f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
|
|
549
|
+
)
|
|
453
550
|
|
|
454
551
|
def _save_anonymized_video(self):
|
|
455
|
-
|
|
456
552
|
original_raw_file_path_to_delete = None
|
|
457
553
|
original_raw_frame_dir_to_delete = None
|
|
458
554
|
video = self._require_current_video()
|
|
459
555
|
anonymized_video_path = video.get_target_anonymized_video_path()
|
|
460
556
|
|
|
461
557
|
if not anonymized_video_path.exists():
|
|
462
|
-
raise RuntimeError(
|
|
558
|
+
raise RuntimeError(
|
|
559
|
+
f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}"
|
|
560
|
+
)
|
|
463
561
|
|
|
464
562
|
new_processed_hash = get_video_hash(anonymized_video_path)
|
|
465
|
-
if
|
|
563
|
+
if (
|
|
564
|
+
video.__class__.objects.filter(processed_video_hash=new_processed_hash)
|
|
565
|
+
.exclude(pk=video.pk)
|
|
566
|
+
.exists()
|
|
567
|
+
):
|
|
466
568
|
raise ValueError(
|
|
467
569
|
f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
|
|
468
570
|
)
|
|
469
571
|
|
|
470
572
|
video.processed_video_hash = new_processed_hash
|
|
471
|
-
video.processed_file.name = anonymized_video_path.relative_to(
|
|
573
|
+
video.processed_file.name = anonymized_video_path.relative_to(
|
|
574
|
+
STORAGE_DIR
|
|
575
|
+
).as_posix()
|
|
472
576
|
|
|
473
577
|
update_fields = [
|
|
474
578
|
"processed_video_hash",
|
|
@@ -484,11 +588,13 @@ class VideoImportService():
|
|
|
484
588
|
|
|
485
589
|
update_fields.extend(["raw_file", "video_hash"])
|
|
486
590
|
|
|
487
|
-
transaction.on_commit(
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
591
|
+
transaction.on_commit(
|
|
592
|
+
lambda: _cleanup_raw_assets(
|
|
593
|
+
video_uuid=video.uuid,
|
|
594
|
+
raw_file_path=original_raw_file_path_to_delete,
|
|
595
|
+
raw_frame_dir=original_raw_frame_dir_to_delete,
|
|
596
|
+
)
|
|
597
|
+
)
|
|
492
598
|
|
|
493
599
|
video.save(update_fields=update_fields)
|
|
494
600
|
video.state.mark_anonymized(save=True)
|
|
@@ -504,60 +610,75 @@ class VideoImportService():
|
|
|
504
610
|
self.logger.info("Attempting fallback video anonymization...")
|
|
505
611
|
video = self.current_video
|
|
506
612
|
if video is None:
|
|
507
|
-
self.logger.warning(
|
|
508
|
-
|
|
613
|
+
self.logger.warning(
|
|
614
|
+
"No VideoFile instance available for fallback anonymization"
|
|
615
|
+
)
|
|
509
616
|
|
|
510
617
|
# Strategy 2: Simple copy (no processing, just copy raw to processed)
|
|
511
|
-
self.logger.info(
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
self.
|
|
618
|
+
self.logger.info(
|
|
619
|
+
"Using simple copy fallback (raw video will be used as 'processed' video)"
|
|
620
|
+
)
|
|
621
|
+
self.processing_context["anonymization_completed"] = False
|
|
622
|
+
self.processing_context["use_raw_as_processed"] = True
|
|
623
|
+
self.logger.warning(
|
|
624
|
+
"Fallback: Video will be imported without anonymization (raw copy used)"
|
|
625
|
+
)
|
|
515
626
|
except Exception as e:
|
|
516
|
-
self.logger.error(
|
|
517
|
-
|
|
518
|
-
|
|
627
|
+
self.logger.error(
|
|
628
|
+
f"Error during fallback anonymization: {e}", exc_info=True
|
|
629
|
+
)
|
|
630
|
+
self.processing_context["anonymization_completed"] = False
|
|
631
|
+
self.processing_context["error_reason"] = str(e)
|
|
632
|
+
|
|
519
633
|
def _finalize_processing(self):
|
|
520
634
|
"""Finalize processing and update video state."""
|
|
521
635
|
self.logger.info("Updating video processing state...")
|
|
522
|
-
|
|
636
|
+
|
|
523
637
|
with transaction.atomic():
|
|
524
638
|
video = self._require_current_video()
|
|
525
639
|
try:
|
|
526
640
|
video.refresh_from_db()
|
|
527
641
|
except Exception as refresh_error:
|
|
528
|
-
self.logger.warning(
|
|
642
|
+
self.logger.warning(
|
|
643
|
+
"Could not refresh VideoFile %s from DB: %s",
|
|
644
|
+
video.uuid,
|
|
645
|
+
refresh_error,
|
|
646
|
+
)
|
|
529
647
|
|
|
530
648
|
state = video.get_or_create_state()
|
|
531
|
-
|
|
649
|
+
|
|
532
650
|
# Only mark frames as extracted if they were successfully extracted
|
|
533
|
-
if self.processing_context.get(
|
|
651
|
+
if self.processing_context.get("frames_extracted", False):
|
|
534
652
|
state.frames_extracted = True
|
|
535
653
|
self.logger.info("Marked frames as extracted in state")
|
|
536
654
|
else:
|
|
537
655
|
self.logger.warning("Frames were not extracted, not updating state")
|
|
538
|
-
|
|
656
|
+
|
|
539
657
|
# Always mark these as true (metadata extraction attempts were made)
|
|
540
658
|
state.frames_initialized = True
|
|
541
659
|
state.video_meta_extracted = True
|
|
542
660
|
state.text_meta_extracted = True
|
|
543
|
-
|
|
661
|
+
|
|
544
662
|
# ✅ FIX: Only mark as processed if anonymization actually completed
|
|
545
|
-
anonymization_completed = self.processing_context.get(
|
|
663
|
+
anonymization_completed = self.processing_context.get(
|
|
664
|
+
"anonymization_completed", False
|
|
665
|
+
)
|
|
546
666
|
if anonymization_completed:
|
|
547
667
|
state.mark_sensitive_meta_processed(save=False)
|
|
548
|
-
self.logger.info(
|
|
668
|
+
self.logger.info(
|
|
669
|
+
"Anonymization completed - marking sensitive meta as processed"
|
|
670
|
+
)
|
|
549
671
|
else:
|
|
550
672
|
self.logger.warning(
|
|
551
|
-
"Anonymization NOT completed - NOT marking as processed. "
|
|
552
|
-
f"Reason: {self.processing_context.get('error_reason', 'Unknown')}"
|
|
673
|
+
f"Anonymization NOT completed - NOT marking as processed. Reason: {self.processing_context.get('error_reason', 'Unknown')}"
|
|
553
674
|
)
|
|
554
675
|
# Explicitly mark as NOT processed
|
|
555
676
|
state.sensitive_meta_processed = False
|
|
556
|
-
|
|
677
|
+
|
|
557
678
|
# Save all state changes
|
|
558
679
|
state.save()
|
|
559
680
|
self.logger.info("Video processing state updated")
|
|
560
|
-
|
|
681
|
+
|
|
561
682
|
# Signal completion
|
|
562
683
|
self._signal_completion()
|
|
563
684
|
|
|
@@ -571,17 +692,20 @@ class VideoImportService():
|
|
|
571
692
|
video = self._require_current_video()
|
|
572
693
|
|
|
573
694
|
processed_video_path = None
|
|
574
|
-
if
|
|
575
|
-
processed_video_path = self.processing_context[
|
|
695
|
+
if "cleaned_video_path" in self.processing_context:
|
|
696
|
+
processed_video_path = self.processing_context["cleaned_video_path"]
|
|
576
697
|
else:
|
|
577
|
-
raw_video_path = self.processing_context.get(
|
|
698
|
+
raw_video_path = self.processing_context.get("raw_video_path")
|
|
578
699
|
if raw_video_path and Path(raw_video_path).exists():
|
|
579
|
-
|
|
580
|
-
|
|
700
|
+
# Use UUID-based naming to avoid conflicts
|
|
701
|
+
suffix = Path(raw_video_path).suffix or ".mp4"
|
|
702
|
+
processed_filename = f"processed_{video.uuid}{suffix}"
|
|
581
703
|
processed_video_path = Path(raw_video_path).parent / processed_filename
|
|
582
704
|
try:
|
|
583
705
|
shutil.copy2(str(raw_video_path), str(processed_video_path))
|
|
584
|
-
self.logger.info(
|
|
706
|
+
self.logger.info(
|
|
707
|
+
"Copied raw video for processing: %s", processed_video_path
|
|
708
|
+
)
|
|
585
709
|
except Exception as exc:
|
|
586
710
|
self.logger.error("Failed to copy raw video: %s", exc)
|
|
587
711
|
processed_video_path = None
|
|
@@ -601,62 +725,86 @@ class VideoImportService():
|
|
|
601
725
|
relative_path = anonym_target_path.relative_to(storage_root)
|
|
602
726
|
video.processed_file.name = str(relative_path)
|
|
603
727
|
video.save(update_fields=["processed_file"])
|
|
604
|
-
self.logger.info(
|
|
728
|
+
self.logger.info(
|
|
729
|
+
"Updated processed_file path to: %s", relative_path
|
|
730
|
+
)
|
|
605
731
|
except Exception as exc:
|
|
606
|
-
self.logger.error(
|
|
607
|
-
|
|
608
|
-
|
|
732
|
+
self.logger.error(
|
|
733
|
+
"Failed to update processed_file path: %s", exc
|
|
734
|
+
)
|
|
735
|
+
video.processed_file.name = (
|
|
736
|
+
f"anonym_videos/{anonym_video_filename}"
|
|
737
|
+
)
|
|
738
|
+
video.save(update_fields=["processed_file"])
|
|
609
739
|
self.logger.info(
|
|
610
740
|
"Updated processed_file path using fallback: %s",
|
|
611
741
|
f"anonym_videos/{anonym_video_filename}",
|
|
612
742
|
)
|
|
613
743
|
|
|
614
|
-
self.processing_context[
|
|
744
|
+
self.processing_context["anonymization_completed"] = True
|
|
615
745
|
else:
|
|
616
|
-
self.logger.warning(
|
|
746
|
+
self.logger.warning(
|
|
747
|
+
"Processed video file not found after move: %s",
|
|
748
|
+
anonym_target_path,
|
|
749
|
+
)
|
|
617
750
|
except Exception as exc:
|
|
618
|
-
self.logger.error(
|
|
751
|
+
self.logger.error(
|
|
752
|
+
"Failed to move processed video to anonym_videos: %s", exc
|
|
753
|
+
)
|
|
619
754
|
else:
|
|
620
|
-
self.logger.warning(
|
|
755
|
+
self.logger.warning(
|
|
756
|
+
"No processed video available - processed_file will remain empty"
|
|
757
|
+
)
|
|
621
758
|
|
|
622
759
|
try:
|
|
623
760
|
from endoreg_db.utils.paths import RAW_FRAME_DIR
|
|
761
|
+
|
|
624
762
|
shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
|
|
625
|
-
self.logger.debug(
|
|
763
|
+
self.logger.debug(
|
|
764
|
+
"Cleaned up temporary frames directory: %s", RAW_FRAME_DIR
|
|
765
|
+
)
|
|
626
766
|
except Exception as exc:
|
|
627
767
|
self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
|
|
628
768
|
|
|
629
|
-
source_path = self.processing_context[
|
|
630
|
-
if self.processing_context[
|
|
769
|
+
source_path = self.processing_context["file_path"]
|
|
770
|
+
if self.processing_context["delete_source"] and Path(source_path).exists():
|
|
631
771
|
try:
|
|
632
772
|
os.remove(source_path)
|
|
633
773
|
self.logger.info("Removed remaining source file: %s", source_path)
|
|
634
774
|
except Exception as exc:
|
|
635
|
-
self.logger.warning(
|
|
775
|
+
self.logger.warning(
|
|
776
|
+
"Failed to remove source file %s: %s", source_path, exc
|
|
777
|
+
)
|
|
636
778
|
|
|
637
779
|
if not video.processed_file or not Path(video.processed_file.path).exists():
|
|
638
|
-
self.logger.warning(
|
|
780
|
+
self.logger.warning(
|
|
781
|
+
"No processed_file found after cleanup - video will be unprocessed"
|
|
782
|
+
)
|
|
639
783
|
try:
|
|
640
784
|
video.anonymize(delete_original_raw=self.delete_source)
|
|
641
|
-
video.save(update_fields=[
|
|
785
|
+
video.save(update_fields=["processed_file"])
|
|
642
786
|
self.logger.info("Late-stage anonymization succeeded")
|
|
643
787
|
except Exception as e:
|
|
644
788
|
self.logger.error("Late-stage anonymization failed: %s", e)
|
|
645
|
-
self.processing_context[
|
|
789
|
+
self.processing_context["anonymization_completed"] = False
|
|
646
790
|
|
|
647
791
|
self.logger.info("Cleanup and archiving completed")
|
|
648
792
|
|
|
649
|
-
self.processed_files.add(str(self.processing_context[
|
|
793
|
+
self.processed_files.add(str(self.processing_context["file_path"]))
|
|
650
794
|
|
|
651
795
|
with transaction.atomic():
|
|
652
796
|
video.refresh_from_db()
|
|
653
|
-
if hasattr(video,
|
|
797
|
+
if hasattr(video, "state") and self.processing_context.get(
|
|
798
|
+
"anonymization_completed"
|
|
799
|
+
):
|
|
654
800
|
video.state.mark_sensitive_meta_processed(save=True)
|
|
655
801
|
|
|
656
|
-
self.logger.info(
|
|
802
|
+
self.logger.info(
|
|
803
|
+
"Import and anonymization completed for VideoFile UUID: %s", video.uuid
|
|
804
|
+
)
|
|
657
805
|
self.logger.info("Raw video stored in: /data/videos")
|
|
658
806
|
self.logger.info("Processed video stored in: /data/anonym_videos")
|
|
659
|
-
|
|
807
|
+
|
|
660
808
|
def _create_sensitive_file(
|
|
661
809
|
self,
|
|
662
810
|
video_instance: VideoFile | None = None,
|
|
@@ -680,7 +828,9 @@ class VideoImportService():
|
|
|
680
828
|
if source_path is None:
|
|
681
829
|
raise ValueError("No file path available for creating sensitive file")
|
|
682
830
|
if not raw_field:
|
|
683
|
-
raise ValueError(
|
|
831
|
+
raise ValueError(
|
|
832
|
+
"VideoFile must have a raw_file to create a sensitive file"
|
|
833
|
+
)
|
|
684
834
|
|
|
685
835
|
target_dir = VIDEO_DIR / "sensitive"
|
|
686
836
|
if not target_dir.exists():
|
|
@@ -690,9 +840,13 @@ class VideoImportService():
|
|
|
690
840
|
target_file_path = target_dir / source_path.name
|
|
691
841
|
try:
|
|
692
842
|
shutil.move(str(source_path), str(target_file_path))
|
|
693
|
-
self.logger.info(
|
|
843
|
+
self.logger.info(
|
|
844
|
+
"Moved raw file to sensitive directory: %s", target_file_path
|
|
845
|
+
)
|
|
694
846
|
except Exception as exc:
|
|
695
|
-
self.logger.warning(
|
|
847
|
+
self.logger.warning(
|
|
848
|
+
"Failed to move raw file to sensitive dir, copying instead: %s", exc
|
|
849
|
+
)
|
|
696
850
|
shutil.copy(str(source_path), str(target_file_path))
|
|
697
851
|
try:
|
|
698
852
|
os.remove(source_path)
|
|
@@ -706,7 +860,10 @@ class VideoImportService():
|
|
|
706
860
|
relative_path = target_file_path.relative_to(storage_root)
|
|
707
861
|
video.raw_file.name = str(relative_path)
|
|
708
862
|
video.save(update_fields=["raw_file"])
|
|
709
|
-
self.logger.info(
|
|
863
|
+
self.logger.info(
|
|
864
|
+
"Updated video.raw_file to point to sensitive location: %s",
|
|
865
|
+
relative_path,
|
|
866
|
+
)
|
|
710
867
|
except Exception as exc:
|
|
711
868
|
self.logger.warning("Failed to set relative path, using fallback: %s", exc)
|
|
712
869
|
video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
|
|
@@ -716,10 +873,17 @@ class VideoImportService():
|
|
|
716
873
|
target_file_path.name,
|
|
717
874
|
)
|
|
718
875
|
|
|
719
|
-
self.
|
|
876
|
+
self.processing_context["raw_video_path"] = target_file_path
|
|
877
|
+
self.processing_context["video_filename"] = target_file_path.name
|
|
878
|
+
|
|
879
|
+
self.logger.info(
|
|
880
|
+
"Created sensitive file for %s at %s", video.uuid, target_file_path
|
|
881
|
+
)
|
|
720
882
|
return target_file_path
|
|
721
883
|
|
|
722
|
-
def _get_processor_roi_info(
|
|
884
|
+
def _get_processor_roi_info(
|
|
885
|
+
self,
|
|
886
|
+
) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
|
|
723
887
|
"""Get processor ROI information for masking."""
|
|
724
888
|
endoscope_data_roi_nested = None
|
|
725
889
|
endoscope_image_roi = None
|
|
@@ -730,10 +894,15 @@ class VideoImportService():
|
|
|
730
894
|
video_meta = getattr(video, "video_meta", None)
|
|
731
895
|
processor = getattr(video_meta, "processor", None) if video_meta else None
|
|
732
896
|
if processor:
|
|
733
|
-
assert isinstance(processor, EndoscopyProcessor),
|
|
897
|
+
assert isinstance(processor, EndoscopyProcessor), (
|
|
898
|
+
"Processor is not of type EndoscopyProcessor"
|
|
899
|
+
)
|
|
734
900
|
endoscope_image_roi = processor.get_roi_endoscope_image()
|
|
735
|
-
endoscope_data_roi_nested = processor.
|
|
736
|
-
self.logger.info(
|
|
901
|
+
endoscope_data_roi_nested = processor.get_sensitive_rois()
|
|
902
|
+
self.logger.info(
|
|
903
|
+
"Retrieved processor ROI information: endoscope_image_roi=%s",
|
|
904
|
+
endoscope_image_roi,
|
|
905
|
+
)
|
|
737
906
|
else:
|
|
738
907
|
self.logger.warning(
|
|
739
908
|
"No processor found for video %s, proceeding without ROI masking",
|
|
@@ -755,28 +924,40 @@ class VideoImportService():
|
|
|
755
924
|
|
|
756
925
|
return endoscope_data_roi_nested, endoscope_image_roi
|
|
757
926
|
|
|
758
|
-
def _ensure_default_patient_data(
|
|
927
|
+
def _ensure_default_patient_data(
|
|
928
|
+
self, video_instance: VideoFile | None = None
|
|
929
|
+
) -> None:
|
|
759
930
|
"""Ensure minimum patient data is present on the video's SensitiveMeta."""
|
|
760
931
|
|
|
761
932
|
video = video_instance or self._require_current_video()
|
|
762
933
|
|
|
763
934
|
sensitive_meta = getattr(video, "sensitive_meta", None)
|
|
764
935
|
if not sensitive_meta:
|
|
765
|
-
self.logger.info(
|
|
936
|
+
self.logger.info(
|
|
937
|
+
"No SensitiveMeta found for video %s, creating default", video.uuid
|
|
938
|
+
)
|
|
766
939
|
default_data = {
|
|
767
940
|
"patient_first_name": "Patient",
|
|
768
941
|
"patient_last_name": "Unknown",
|
|
769
942
|
"patient_dob": date(1990, 1, 1),
|
|
770
943
|
"examination_date": date.today(),
|
|
771
|
-
"center_name": video.center.name
|
|
944
|
+
"center_name": video.center.name
|
|
945
|
+
if video.center
|
|
946
|
+
else "university_hospital_wuerzburg",
|
|
772
947
|
}
|
|
773
948
|
try:
|
|
774
949
|
sensitive_meta = SensitiveMeta.create_from_dict(default_data)
|
|
775
950
|
video.sensitive_meta = sensitive_meta
|
|
776
951
|
video.save(update_fields=["sensitive_meta"])
|
|
777
|
-
self.logger.info(
|
|
952
|
+
self.logger.info(
|
|
953
|
+
"Created default SensitiveMeta for video %s", video.uuid
|
|
954
|
+
)
|
|
778
955
|
except Exception as exc:
|
|
779
|
-
self.logger.error(
|
|
956
|
+
self.logger.error(
|
|
957
|
+
"Failed to create default SensitiveMeta for video %s: %s",
|
|
958
|
+
video.uuid,
|
|
959
|
+
exc,
|
|
960
|
+
)
|
|
780
961
|
return
|
|
781
962
|
else:
|
|
782
963
|
update_data: Dict[str, Any] = {}
|
|
@@ -800,14 +981,16 @@ class VideoImportService():
|
|
|
800
981
|
list(update_data.keys()),
|
|
801
982
|
)
|
|
802
983
|
except Exception as exc:
|
|
803
|
-
self.logger.error(
|
|
804
|
-
|
|
805
|
-
|
|
984
|
+
self.logger.error(
|
|
985
|
+
"Failed to update SensitiveMeta for video %s: %s",
|
|
986
|
+
video.uuid,
|
|
987
|
+
exc,
|
|
988
|
+
)
|
|
806
989
|
|
|
807
990
|
def _ensure_frame_cleaning_available(self):
|
|
808
991
|
"""
|
|
809
992
|
Ensure frame cleaning modules are available by adding lx-anonymizer to path.
|
|
810
|
-
|
|
993
|
+
|
|
811
994
|
Returns:
|
|
812
995
|
Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
|
|
813
996
|
"""
|
|
@@ -816,14 +999,14 @@ class VideoImportService():
|
|
|
816
999
|
from lx_anonymizer import FrameCleaner # type: ignore[import]
|
|
817
1000
|
|
|
818
1001
|
if FrameCleaner:
|
|
819
|
-
return True, FrameCleaner
|
|
820
|
-
|
|
1002
|
+
return True, FrameCleaner()
|
|
1003
|
+
|
|
821
1004
|
except Exception as e:
|
|
822
|
-
self.logger.warning(
|
|
823
|
-
|
|
824
|
-
|
|
1005
|
+
self.logger.warning(
|
|
1006
|
+
f"Frame cleaning not available: {e} Please install or update lx_anonymizer."
|
|
1007
|
+
)
|
|
825
1008
|
|
|
826
|
-
|
|
1009
|
+
return False, None
|
|
827
1010
|
|
|
828
1011
|
def _perform_frame_cleaning(self, endoscope_data_roi_nested, endoscope_image_roi):
|
|
829
1012
|
"""Perform frame cleaning and anonymization."""
|
|
@@ -834,37 +1017,49 @@ class VideoImportService():
|
|
|
834
1017
|
raise RuntimeError("Frame cleaning not available")
|
|
835
1018
|
|
|
836
1019
|
# Prepare parameters for frame cleaning
|
|
837
|
-
raw_video_path = self.processing_context.get(
|
|
838
|
-
|
|
1020
|
+
raw_video_path = self.processing_context.get("raw_video_path")
|
|
1021
|
+
|
|
839
1022
|
if not raw_video_path or not Path(raw_video_path).exists():
|
|
840
|
-
|
|
1023
|
+
try:
|
|
1024
|
+
self.current_video = self._require_current_video()
|
|
1025
|
+
raw_video_path = self.current_video.get_raw_file_path()
|
|
1026
|
+
except Exception:
|
|
1027
|
+
raise RuntimeError(f"Raw video path not found: {raw_video_path}")
|
|
841
1028
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
1029
|
+
# Create temporary output path for cleaned video using UUID to avoid naming conflicts
|
|
1030
|
+
video = self._require_current_video()
|
|
1031
|
+
# Ensure raw_video_path is not None
|
|
1032
|
+
if not raw_video_path:
|
|
1033
|
+
raise RuntimeError(
|
|
1034
|
+
"raw_video_path is None, cannot construct cleaned_video_path"
|
|
1035
|
+
)
|
|
1036
|
+
suffix = Path(raw_video_path).suffix or ".mp4"
|
|
1037
|
+
cleaned_filename = f"cleaned_{video.uuid}{suffix}"
|
|
846
1038
|
cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
|
|
847
|
-
|
|
848
|
-
|
|
1039
|
+
self.logger.debug("Using UUID-based cleaned filename: %s", cleaned_filename)
|
|
1040
|
+
|
|
849
1041
|
# Clean video with ROI masking (heavy I/O operation)
|
|
850
1042
|
actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
|
|
851
1043
|
video_path=Path(raw_video_path),
|
|
852
1044
|
endoscope_image_roi=endoscope_image_roi,
|
|
853
1045
|
endoscope_data_roi_nested=endoscope_data_roi_nested,
|
|
854
1046
|
output_path=cleaned_video_path,
|
|
855
|
-
technique="mask_overlay"
|
|
1047
|
+
technique="mask_overlay",
|
|
856
1048
|
)
|
|
857
|
-
|
|
858
|
-
|
|
1049
|
+
|
|
859
1050
|
# Store cleaned video path for later use in _cleanup_and_archive
|
|
860
|
-
self.processing_context[
|
|
861
|
-
self.processing_context[
|
|
862
|
-
|
|
1051
|
+
self.processing_context["cleaned_video_path"] = actual_cleaned_path
|
|
1052
|
+
self.processing_context["extracted_metadata"] = extracted_metadata
|
|
1053
|
+
|
|
863
1054
|
# Update sensitive metadata with extracted information
|
|
864
1055
|
self._update_sensitive_metadata(extracted_metadata)
|
|
865
|
-
self.logger.info(
|
|
866
|
-
|
|
867
|
-
|
|
1056
|
+
self.logger.info(
|
|
1057
|
+
f"Extracted metadata from frame cleaning: {extracted_metadata}"
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
self.logger.info(
|
|
1061
|
+
f"Frame cleaning with ROI masking completed: {actual_cleaned_path}"
|
|
1062
|
+
)
|
|
868
1063
|
self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
|
|
869
1064
|
|
|
870
1065
|
def _update_sensitive_metadata(self, extracted_metadata: Dict[str, Any]):
|
|
@@ -881,22 +1076,67 @@ class VideoImportService():
|
|
|
881
1076
|
|
|
882
1077
|
sm = sensitive_meta
|
|
883
1078
|
updated_fields = []
|
|
884
|
-
|
|
1079
|
+
|
|
1080
|
+
# Ensure center is set from video.center if not in extracted_metadata
|
|
1081
|
+
metadata_to_update = extracted_metadata.copy()
|
|
1082
|
+
|
|
1083
|
+
# FIX: Set center object instead of center_name string
|
|
1084
|
+
if not hasattr(sm, "center") or not sm.center:
|
|
1085
|
+
if video.center:
|
|
1086
|
+
metadata_to_update["center"] = video.center
|
|
1087
|
+
self.logger.debug(
|
|
1088
|
+
"Added center object '%s' to metadata for SensitiveMeta update",
|
|
1089
|
+
video.center.name,
|
|
1090
|
+
)
|
|
1091
|
+
else:
|
|
1092
|
+
center_name = metadata_to_update.get("center_name")
|
|
1093
|
+
if center_name:
|
|
1094
|
+
try:
|
|
1095
|
+
from ..models.administration import Center
|
|
1096
|
+
|
|
1097
|
+
center_obj = Center.objects.get(name=center_name)
|
|
1098
|
+
metadata_to_update["center"] = center_obj
|
|
1099
|
+
self.logger.debug(
|
|
1100
|
+
"Loaded center object '%s' from center_name", center_name
|
|
1101
|
+
)
|
|
1102
|
+
metadata_to_update.pop("center_name", None)
|
|
1103
|
+
except Center.DoesNotExist:
|
|
1104
|
+
self.logger.error(
|
|
1105
|
+
"Center '%s' not found in database", center_name
|
|
1106
|
+
)
|
|
1107
|
+
return
|
|
1108
|
+
|
|
885
1109
|
try:
|
|
886
|
-
sm.update_from_dict(
|
|
887
|
-
updated_fields = list(
|
|
1110
|
+
sm.update_from_dict(metadata_to_update)
|
|
1111
|
+
updated_fields = list(
|
|
1112
|
+
extracted_metadata.keys()
|
|
1113
|
+
) # Only log originally extracted fields
|
|
888
1114
|
except KeyError as e:
|
|
889
1115
|
self.logger.warning(f"Failed to update SensitiveMeta field {e}")
|
|
890
|
-
|
|
1116
|
+
return
|
|
1117
|
+
|
|
891
1118
|
if updated_fields:
|
|
892
|
-
|
|
893
|
-
|
|
1119
|
+
try:
|
|
1120
|
+
sm.save() # Remove update_fields to allow all necessary fields to be saved
|
|
1121
|
+
self.logger.info(
|
|
1122
|
+
"Updated SensitiveMeta fields for video %s: %s",
|
|
1123
|
+
video.uuid,
|
|
1124
|
+
updated_fields,
|
|
1125
|
+
)
|
|
894
1126
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
1127
|
+
state = video.get_or_create_state()
|
|
1128
|
+
state.mark_sensitive_meta_processed(save=True)
|
|
1129
|
+
self.logger.info(
|
|
1130
|
+
"Marked sensitive metadata as processed for video %s", video.uuid
|
|
1131
|
+
)
|
|
1132
|
+
except Exception as e:
|
|
1133
|
+
self.logger.error(f"Failed to save SensitiveMeta: {e}")
|
|
1134
|
+
raise # Re-raise to trigger fallback in calling method
|
|
898
1135
|
else:
|
|
899
|
-
self.logger.info(
|
|
1136
|
+
self.logger.info(
|
|
1137
|
+
"No SensitiveMeta fields updated for video %s - all existing values preserved",
|
|
1138
|
+
video.uuid,
|
|
1139
|
+
)
|
|
900
1140
|
|
|
901
1141
|
def _signal_completion(self):
|
|
902
1142
|
"""Signal completion to the tracking system."""
|
|
@@ -912,21 +1152,28 @@ class VideoImportService():
|
|
|
912
1152
|
raw_exists = False
|
|
913
1153
|
|
|
914
1154
|
video_processing_complete = (
|
|
915
|
-
video.sensitive_meta is not None
|
|
916
|
-
video.video_meta is not None
|
|
917
|
-
raw_exists
|
|
1155
|
+
video.sensitive_meta is not None
|
|
1156
|
+
and video.video_meta is not None
|
|
1157
|
+
and raw_exists
|
|
918
1158
|
)
|
|
919
1159
|
|
|
920
1160
|
if video_processing_complete:
|
|
921
|
-
self.logger.info(
|
|
1161
|
+
self.logger.info(
|
|
1162
|
+
"Video %s processing completed successfully - ready for validation",
|
|
1163
|
+
video.uuid,
|
|
1164
|
+
)
|
|
922
1165
|
|
|
923
1166
|
# Update completion flags if they exist
|
|
924
1167
|
completion_fields = []
|
|
925
|
-
for field_name in [
|
|
1168
|
+
for field_name in [
|
|
1169
|
+
"import_completed",
|
|
1170
|
+
"processing_complete",
|
|
1171
|
+
"ready_for_validation",
|
|
1172
|
+
]:
|
|
926
1173
|
if hasattr(video, field_name):
|
|
927
1174
|
setattr(video, field_name, True)
|
|
928
1175
|
completion_fields.append(field_name)
|
|
929
|
-
|
|
1176
|
+
|
|
930
1177
|
if completion_fields:
|
|
931
1178
|
video.save(update_fields=completion_fields)
|
|
932
1179
|
self.logger.info("Updated completion flags: %s", completion_fields)
|
|
@@ -935,15 +1182,15 @@ class VideoImportService():
|
|
|
935
1182
|
"Video %s processing incomplete - missing required components",
|
|
936
1183
|
video.uuid,
|
|
937
1184
|
)
|
|
938
|
-
|
|
1185
|
+
|
|
939
1186
|
except Exception as e:
|
|
940
1187
|
self.logger.warning(f"Failed to signal completion status: {e}")
|
|
941
1188
|
|
|
942
1189
|
def _cleanup_on_error(self):
|
|
943
1190
|
"""Cleanup processing context on error."""
|
|
944
|
-
if self.current_video and hasattr(self.current_video,
|
|
1191
|
+
if self.current_video and hasattr(self.current_video, "state"):
|
|
945
1192
|
try:
|
|
946
|
-
if self.processing_context.get(
|
|
1193
|
+
if self.processing_context.get("processing_started"):
|
|
947
1194
|
self.current_video.state.frames_extracted = False
|
|
948
1195
|
self.current_video.state.frames_initialized = False
|
|
949
1196
|
self.current_video.state.video_meta_extracted = False
|
|
@@ -955,29 +1202,34 @@ class VideoImportService():
|
|
|
955
1202
|
def _cleanup_processing_context(self):
|
|
956
1203
|
"""
|
|
957
1204
|
Cleanup processing context and release file lock.
|
|
958
|
-
|
|
1205
|
+
|
|
959
1206
|
This method is always called in the finally block of import_and_anonymize()
|
|
960
1207
|
to ensure the file lock is released even if processing fails.
|
|
961
1208
|
"""
|
|
1209
|
+
# DEFENSIVE: Ensure processing_context exists before accessing it
|
|
1210
|
+
if not hasattr(self, "processing_context"):
|
|
1211
|
+
self.processing_context = {}
|
|
1212
|
+
|
|
962
1213
|
try:
|
|
963
1214
|
# Release file lock if it was acquired
|
|
964
|
-
lock_context = self.processing_context.get(
|
|
1215
|
+
lock_context = self.processing_context.get("_lock_context")
|
|
965
1216
|
if lock_context is not None:
|
|
966
1217
|
try:
|
|
967
1218
|
lock_context.__exit__(None, None, None)
|
|
968
1219
|
self.logger.info("Released file lock")
|
|
969
1220
|
except Exception as e:
|
|
970
1221
|
self.logger.warning(f"Error releasing file lock: {e}")
|
|
971
|
-
|
|
1222
|
+
|
|
972
1223
|
# Remove file from processed set if processing failed
|
|
973
|
-
file_path = self.processing_context.get(
|
|
974
|
-
if file_path and not self.processing_context.get(
|
|
1224
|
+
file_path = self.processing_context.get("file_path")
|
|
1225
|
+
if file_path and not self.processing_context.get("anonymization_completed"):
|
|
975
1226
|
file_path_str = str(file_path)
|
|
976
1227
|
if file_path_str in self.processed_files:
|
|
977
1228
|
self.processed_files.remove(file_path_str)
|
|
978
|
-
self.logger.info(
|
|
979
|
-
|
|
980
|
-
|
|
1229
|
+
self.logger.info(
|
|
1230
|
+
f"Removed {file_path_str} from processed files (failed processing)"
|
|
1231
|
+
)
|
|
1232
|
+
|
|
981
1233
|
except Exception as e:
|
|
982
1234
|
self.logger.warning(f"Error during context cleanup: {e}")
|
|
983
1235
|
finally:
|
|
@@ -985,6 +1237,7 @@ class VideoImportService():
|
|
|
985
1237
|
self.current_video = None
|
|
986
1238
|
self.processing_context = {}
|
|
987
1239
|
|
|
1240
|
+
|
|
988
1241
|
# Convenience function for callers/tests that expect a module-level import_and_anonymize
|
|
989
1242
|
def import_and_anonymize(
|
|
990
1243
|
file_path,
|
|
@@ -1003,4 +1256,4 @@ def import_and_anonymize(
|
|
|
1003
1256
|
processor_name=processor_name,
|
|
1004
1257
|
save_video=save_video,
|
|
1005
1258
|
delete_source=delete_source,
|
|
1006
|
-
)
|
|
1259
|
+
)
|