endoreg-db 0.8.2.8__py3-none-any.whl → 0.8.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/models/media/video/pipe_1.py +13 -4
- endoreg_db/models/metadata/model_meta.py +14 -1
- endoreg_db/models/metadata/model_meta_logic.py +88 -1
- endoreg_db/services/video_import.py +200 -213
- {endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/METADATA +3 -2
- {endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/RECORD +8 -8
- {endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.2.8.dist-info → endoreg_db-0.8.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -74,11 +74,20 @@ def _pipe_1(
|
|
|
74
74
|
except AiModel.DoesNotExist:
|
|
75
75
|
logger.error(f"Pipe 1 failed: Model '{model_name}' not found.")
|
|
76
76
|
return False
|
|
77
|
+
|
|
77
78
|
except ModelMeta.DoesNotExist:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
try:
|
|
80
|
+
model_name = download_segmentation_model()
|
|
81
|
+
ai_model_obj = AiModel.objects.get(name=model_name)
|
|
82
|
+
if model_meta_version is not None:
|
|
83
|
+
model_meta = ai_model_obj.metadata_versions.get(version=model_meta_version)
|
|
84
|
+
else:
|
|
85
|
+
model_meta = ModelMeta.setup_default_from_huggingface()
|
|
86
|
+
except ModelMeta.DoesNotExist:
|
|
87
|
+
logger.error(
|
|
88
|
+
f"Pipe 1 failed: ModelMeta version {model_meta_version} for model '{model_name}' not found."
|
|
89
|
+
)
|
|
90
|
+
return False
|
|
82
91
|
try:
|
|
83
92
|
sequences: Optional[Dict[str, List[Tuple[int, int]]]] = video_file.predict_video(
|
|
84
93
|
model_meta=model_meta,
|
|
@@ -18,7 +18,6 @@ from . import model_meta_logic as logic
|
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from endoreg_db.models import LabelSet, AiModel # pylint: disable=import-outside-toplevel
|
|
21
|
-
from torch.nn import Module as TorchModule
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
class ModelMetaManager(models.Manager):
|
|
@@ -128,6 +127,20 @@ class ModelMeta(models.Model):
|
|
|
128
127
|
cls, meta_name, model_name, labelset_name, weights_file,
|
|
129
128
|
requested_version, bump_if_exists, **kwargs
|
|
130
129
|
)
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def setup_default_from_huggingface(
|
|
133
|
+
cls: Type["ModelMeta"],
|
|
134
|
+
model_id: str = "wg-lux/colo_segmentation_RegNetX800MF_base",
|
|
135
|
+
labelset_name: Optional[str] = None,
|
|
136
|
+
) -> "ModelMeta":
|
|
137
|
+
"""
|
|
138
|
+
Downloads a pretrained model from Hugging Face and initializes ModelMeta automatically.
|
|
139
|
+
"""
|
|
140
|
+
# If labelset_name is not provided, handle default logic here if needed
|
|
141
|
+
return logic.setup_default_from_huggingface_logic(cls, model_id, labelset_name)
|
|
142
|
+
|
|
143
|
+
|
|
131
144
|
|
|
132
145
|
@classmethod
|
|
133
146
|
def get_latest_version_number(cls: Type["ModelMeta"], meta_name: str, model_name: str) -> int:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import shutil
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Optional, TYPE_CHECKING, Any, Type
|
|
4
|
-
|
|
4
|
+
from huggingface_hub import hf_hub_download
|
|
5
5
|
from django.db import transaction
|
|
6
6
|
|
|
7
7
|
# Assuming ModelMeta, AiModel, LabelSet are importable from the correct locations
|
|
@@ -234,3 +234,90 @@ def get_model_meta_by_name_version_logic(
|
|
|
234
234
|
raise cls.DoesNotExist(
|
|
235
235
|
f"No ModelMeta found for '{meta_name}' and model '{model_name}'."
|
|
236
236
|
)
|
|
237
|
+
|
|
238
|
+
from huggingface_hub import model_info
|
|
239
|
+
import re
|
|
240
|
+
|
|
241
|
+
def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
|
|
242
|
+
"""
|
|
243
|
+
Infers default model metadata (activation, normalization, input size)
|
|
244
|
+
from a Hugging Face model_id using its tags and architecture.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
A dict with fields: name, activation, mean, std, size_x, size_y
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
if not (info := model_info(model_id)):
|
|
251
|
+
logger.info(f"Could not retrieve model info for {model_id}, using ColoReg segmentation defaults.")
|
|
252
|
+
return {
|
|
253
|
+
"name": "wg-lux/colo_segmentation_RegNetX800MF_base",
|
|
254
|
+
"activation": "sigmoid",
|
|
255
|
+
"mean": (0.45211223, 0.27139644, 0.19264949),
|
|
256
|
+
"std": (0.31418097, 0.21088019, 0.16059452),
|
|
257
|
+
"size_x": 716,
|
|
258
|
+
"size_y": 716,
|
|
259
|
+
"description": f"Defaults for unknown model {model_id}",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
# Extract architecture from tags or model_id ---
|
|
263
|
+
tags = info.tags or []
|
|
264
|
+
model_name = model_id.split("/")[-1].lower()
|
|
265
|
+
|
|
266
|
+
# Heuristics for architecture and task
|
|
267
|
+
architecture = next((t for t in tags if t.startswith("architecture:")), None)
|
|
268
|
+
task = next((t for t in tags if t.startswith("task:")), None)
|
|
269
|
+
|
|
270
|
+
# Default values
|
|
271
|
+
activation = "sigmoid"
|
|
272
|
+
size_x = size_y = 716
|
|
273
|
+
mean = (0.45211223, 0.27139644, 0.19264949)
|
|
274
|
+
std = (0.31418097, 0.21088019, 0.16059452)
|
|
275
|
+
|
|
276
|
+
# --- 2. Task-based inference ---
|
|
277
|
+
if task:
|
|
278
|
+
if "segmentation" in task or "detection" in task:
|
|
279
|
+
activation = "sigmoid"
|
|
280
|
+
elif any(k in task for k in ["classification"]):
|
|
281
|
+
activation = "softmax"
|
|
282
|
+
|
|
283
|
+
# --- 3. Architecture-based inference ---
|
|
284
|
+
if architecture:
|
|
285
|
+
arch = architecture.replace("architecture:", "")
|
|
286
|
+
else:
|
|
287
|
+
arch = re.sub(r"[^a-z0-9]+", "_", model_name)
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"name": arch,
|
|
291
|
+
"activation": activation,
|
|
292
|
+
"mean": mean,
|
|
293
|
+
"std": std,
|
|
294
|
+
"size_x": size_x,
|
|
295
|
+
"size_y": size_y,
|
|
296
|
+
"description": f"Inferred defaults for {model_id}",
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str | None = None):
|
|
300
|
+
"""
|
|
301
|
+
Downloads model weights from Hugging Face and auto-fills ModelMeta fields.
|
|
302
|
+
"""
|
|
303
|
+
meta = infer_default_model_meta_from_hf(model_id)
|
|
304
|
+
|
|
305
|
+
# Download weights
|
|
306
|
+
weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=WEIGHTS_DIR)
|
|
307
|
+
|
|
308
|
+
ai_model, _ = AiModel.objects.get_or_create(name=meta["name"])
|
|
309
|
+
labelset = LabelSet.objects.first() if not labelset_name else LabelSet.objects.get(name=labelset_name)
|
|
310
|
+
|
|
311
|
+
return create_from_file_logic(
|
|
312
|
+
cls,
|
|
313
|
+
meta_name=meta["name"],
|
|
314
|
+
model_name=ai_model.name,
|
|
315
|
+
labelset_name=labelset.name,
|
|
316
|
+
weights_file=weights_path,
|
|
317
|
+
activation=meta["activation"],
|
|
318
|
+
mean=meta["mean"],
|
|
319
|
+
std=meta["std"],
|
|
320
|
+
size_x=meta["size_x"],
|
|
321
|
+
size_y=meta["size_y"],
|
|
322
|
+
description=meta["description"],
|
|
323
|
+
)
|
|
@@ -8,26 +8,24 @@ Changelog:
|
|
|
8
8
|
October 14, 2025: Added file locking mechanism to prevent race conditions
|
|
9
9
|
during concurrent video imports (matches PDF import pattern)
|
|
10
10
|
"""
|
|
11
|
-
|
|
11
|
+
from datetime import date
|
|
12
12
|
import logging
|
|
13
|
+
import sys
|
|
13
14
|
import os
|
|
14
|
-
import random
|
|
15
15
|
import shutil
|
|
16
|
-
import sys
|
|
17
16
|
import time
|
|
18
17
|
from contextlib import contextmanager
|
|
19
|
-
from datetime import date
|
|
20
18
|
from pathlib import Path
|
|
21
|
-
from typing import
|
|
22
|
-
|
|
19
|
+
from typing import Union, Dict, Any, Optional, List, Tuple
|
|
23
20
|
from django.db import transaction
|
|
24
|
-
from
|
|
21
|
+
from endoreg_db.models import VideoFile, SensitiveMeta
|
|
22
|
+
from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
|
|
23
|
+
import random
|
|
25
24
|
from lx_anonymizer.ocr import trocr_full_image_ocr
|
|
26
|
-
|
|
27
|
-
from endoreg_db.models import SensitiveMeta, VideoFile
|
|
28
|
-
from endoreg_db.models.media.video.video_file_anonymize import _anonymize, _cleanup_raw_assets
|
|
29
25
|
from endoreg_db.utils.hashs import get_video_hash
|
|
30
|
-
from endoreg_db.
|
|
26
|
+
from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
|
|
27
|
+
from typing import TYPE_CHECKING
|
|
28
|
+
from django.db.models.fields.files import FieldFile
|
|
31
29
|
|
|
32
30
|
if TYPE_CHECKING:
|
|
33
31
|
from endoreg_db.models import EndoscopyProcessor
|
|
@@ -39,36 +37,37 @@ MAX_LOCK_WAIT_SECONDS = 90 # New: wait up to 90s for a non-stale lock to clear
|
|
|
39
37
|
logger = logging.getLogger(__name__)
|
|
40
38
|
|
|
41
39
|
|
|
42
|
-
class VideoImportService:
|
|
40
|
+
class VideoImportService():
|
|
43
41
|
"""
|
|
44
42
|
Service for importing and anonymizing video files.
|
|
45
43
|
Uses a central video instance pattern for cleaner state management.
|
|
46
|
-
|
|
44
|
+
|
|
47
45
|
Features (October 14, 2025):
|
|
48
46
|
- File locking to prevent concurrent processing of the same video
|
|
49
47
|
- Stale lock detection and reclamation (600s timeout)
|
|
50
48
|
- Hash-based duplicate detection
|
|
51
49
|
- Graceful fallback processing without lx_anonymizer
|
|
52
50
|
"""
|
|
53
|
-
|
|
51
|
+
|
|
54
52
|
def __init__(self, project_root: Optional[Path] = None):
|
|
53
|
+
|
|
55
54
|
# Set up project root path
|
|
56
55
|
if project_root:
|
|
57
56
|
self.project_root = Path(project_root)
|
|
58
57
|
else:
|
|
59
58
|
self.project_root = Path(__file__).parent.parent.parent.parent
|
|
60
|
-
|
|
59
|
+
|
|
61
60
|
# Track processed files to prevent duplicates
|
|
62
61
|
self.processed_files = set(str(file) for file in os.listdir(ANONYM_VIDEO_DIR))
|
|
63
|
-
|
|
62
|
+
|
|
64
63
|
self.STORAGE_DIR = STORAGE_DIR
|
|
65
|
-
|
|
64
|
+
|
|
66
65
|
# Central video instance and processing context
|
|
67
66
|
self.current_video: Optional[VideoFile] = None
|
|
68
67
|
self.processing_context: Dict[str, Any] = {}
|
|
69
|
-
|
|
68
|
+
|
|
70
69
|
self.delete_source = False
|
|
71
|
-
|
|
70
|
+
|
|
72
71
|
self.logger = logging.getLogger(__name__)
|
|
73
72
|
|
|
74
73
|
def _require_current_video(self) -> VideoFile:
|
|
@@ -76,12 +75,12 @@ class VideoImportService:
|
|
|
76
75
|
if self.current_video is None:
|
|
77
76
|
raise RuntimeError("Current video instance is not set")
|
|
78
77
|
return self.current_video
|
|
79
|
-
|
|
78
|
+
|
|
80
79
|
@contextmanager
|
|
81
80
|
def _file_lock(self, path: Path):
|
|
82
81
|
"""
|
|
83
82
|
Create a file lock to prevent duplicate processing of the same video.
|
|
84
|
-
|
|
83
|
+
|
|
85
84
|
This context manager creates a .lock file alongside the video file.
|
|
86
85
|
If the lock file already exists, it checks if it's stale (older than
|
|
87
86
|
STALE_LOCK_SECONDS) and reclaims it if necessary. If it's not stale,
|
|
@@ -105,21 +104,24 @@ class VideoImportService:
|
|
|
105
104
|
except FileNotFoundError:
|
|
106
105
|
# Race: lock removed between exists and stat; retry acquire in next loop
|
|
107
106
|
age = None
|
|
108
|
-
|
|
107
|
+
|
|
109
108
|
if age is not None and age > STALE_LOCK_SECONDS:
|
|
110
109
|
try:
|
|
111
|
-
logger.warning(
|
|
110
|
+
logger.warning(
|
|
111
|
+
"Stale lock detected for %s (age %.0fs). Reclaiming lock...",
|
|
112
|
+
path, age
|
|
113
|
+
)
|
|
112
114
|
lock_path.unlink()
|
|
113
115
|
except Exception as e:
|
|
114
116
|
logger.warning("Failed to remove stale lock %s: %s", lock_path, e)
|
|
115
117
|
# Loop continues and retries acquire immediately
|
|
116
118
|
continue
|
|
117
|
-
|
|
119
|
+
|
|
118
120
|
# Not stale: wait until deadline, then give up gracefully
|
|
119
121
|
if time.time() >= deadline:
|
|
120
122
|
raise ValueError(f"File already being processed: {path}")
|
|
121
123
|
time.sleep(1.0)
|
|
122
|
-
|
|
124
|
+
|
|
123
125
|
os.write(fd, b"lock")
|
|
124
126
|
os.close(fd)
|
|
125
127
|
fd = None
|
|
@@ -132,11 +134,11 @@ class VideoImportService:
|
|
|
132
134
|
lock_path.unlink()
|
|
133
135
|
except OSError:
|
|
134
136
|
pass
|
|
135
|
-
|
|
137
|
+
|
|
136
138
|
def processed(self) -> bool:
|
|
137
139
|
"""Indicates if the current file has already been processed."""
|
|
138
|
-
return getattr(self,
|
|
139
|
-
|
|
140
|
+
return getattr(self, '_processed', False)
|
|
141
|
+
|
|
140
142
|
def import_and_anonymize(
|
|
141
143
|
self,
|
|
142
144
|
file_path: Union[Path, str],
|
|
@@ -151,8 +153,9 @@ class VideoImportService:
|
|
|
151
153
|
"""
|
|
152
154
|
try:
|
|
153
155
|
# Initialize processing context
|
|
154
|
-
self._initialize_processing_context(file_path, center_name, processor_name,
|
|
155
|
-
|
|
156
|
+
self._initialize_processing_context(file_path, center_name, processor_name,
|
|
157
|
+
save_video, delete_source)
|
|
158
|
+
|
|
156
159
|
# Validate and prepare file (may raise ValueError if another worker holds a non-stale lock)
|
|
157
160
|
try:
|
|
158
161
|
self._validate_and_prepare_file()
|
|
@@ -162,24 +165,27 @@ class VideoImportService:
|
|
|
162
165
|
self.logger.info(f"Skipping {file_path}: {ve}")
|
|
163
166
|
return None
|
|
164
167
|
raise
|
|
165
|
-
|
|
166
|
-
# Create
|
|
168
|
+
|
|
169
|
+
# Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
|
|
170
|
+
self._create_sensitive_file()
|
|
171
|
+
|
|
172
|
+
# Create or retrieve video instance
|
|
167
173
|
self._create_or_retrieve_video_instance()
|
|
168
|
-
|
|
174
|
+
|
|
169
175
|
# Setup processing environment
|
|
170
176
|
self._setup_processing_environment()
|
|
171
|
-
|
|
177
|
+
|
|
172
178
|
# Process frames and metadata
|
|
173
179
|
self._process_frames_and_metadata()
|
|
174
|
-
|
|
180
|
+
|
|
175
181
|
# Finalize processing
|
|
176
182
|
self._finalize_processing()
|
|
177
|
-
|
|
183
|
+
|
|
178
184
|
# Move files and cleanup
|
|
179
185
|
self._cleanup_and_archive()
|
|
180
|
-
|
|
186
|
+
|
|
181
187
|
return self.current_video
|
|
182
|
-
|
|
188
|
+
|
|
183
189
|
except Exception as e:
|
|
184
190
|
self.logger.error(f"Video import and anonymization failed for {file_path}: {e}")
|
|
185
191
|
self._cleanup_on_error()
|
|
@@ -187,93 +193,94 @@ class VideoImportService:
|
|
|
187
193
|
finally:
|
|
188
194
|
self._cleanup_processing_context()
|
|
189
195
|
|
|
190
|
-
def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str,
|
|
196
|
+
def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str,
|
|
197
|
+
processor_name: str, save_video: bool, delete_source: bool):
|
|
191
198
|
"""Initialize the processing context for the current video import."""
|
|
192
199
|
self.processing_context = {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
200
|
+
'file_path': Path(file_path),
|
|
201
|
+
'center_name': center_name,
|
|
202
|
+
'processor_name': processor_name,
|
|
203
|
+
'save_video': save_video,
|
|
204
|
+
'delete_source': delete_source,
|
|
205
|
+
'processing_started': False,
|
|
206
|
+
'frames_extracted': False,
|
|
207
|
+
'anonymization_completed': False,
|
|
208
|
+
'error_reason': None
|
|
202
209
|
}
|
|
203
|
-
|
|
210
|
+
|
|
204
211
|
self.logger.info(f"Initialized processing context for: {file_path}")
|
|
205
212
|
|
|
206
213
|
def _validate_and_prepare_file(self):
|
|
207
214
|
"""
|
|
208
215
|
Validate the video file and prepare for processing.
|
|
209
|
-
|
|
216
|
+
|
|
210
217
|
Uses file locking to prevent concurrent processing of the same video file.
|
|
211
218
|
This prevents race conditions where multiple workers might try to process
|
|
212
219
|
the same video simultaneously.
|
|
213
|
-
|
|
220
|
+
|
|
214
221
|
The lock is acquired here and held for the entire import process.
|
|
215
222
|
See _file_lock() for lock reclamation logic.
|
|
216
223
|
"""
|
|
217
|
-
file_path = self.processing_context[
|
|
218
|
-
|
|
224
|
+
file_path = self.processing_context['file_path']
|
|
225
|
+
|
|
219
226
|
# Acquire file lock to prevent concurrent processing
|
|
220
227
|
# Lock will be held until finally block in import_and_anonymize()
|
|
221
|
-
self.processing_context[
|
|
222
|
-
self.processing_context[
|
|
223
|
-
|
|
228
|
+
self.processing_context['_lock_context'] = self._file_lock(file_path)
|
|
229
|
+
self.processing_context['_lock_context'].__enter__()
|
|
230
|
+
|
|
224
231
|
self.logger.info("Acquired file lock for: %s", file_path)
|
|
225
|
-
|
|
232
|
+
|
|
226
233
|
# Check if already processed (memory-based check)
|
|
227
234
|
if str(file_path) in self.processed_files:
|
|
228
235
|
self.logger.info("File %s already processed, skipping", file_path)
|
|
229
236
|
self._processed = True
|
|
230
237
|
raise ValueError(f"File already processed: {file_path}")
|
|
231
|
-
|
|
238
|
+
|
|
232
239
|
# Check file exists
|
|
233
240
|
if not file_path.exists():
|
|
234
241
|
raise FileNotFoundError(f"Video file not found: {file_path}")
|
|
235
|
-
|
|
242
|
+
|
|
236
243
|
self.logger.info("File validation completed for: %s", file_path)
|
|
237
244
|
|
|
238
245
|
def _create_or_retrieve_video_instance(self):
|
|
239
246
|
"""Create or retrieve the VideoFile instance and move to final storage."""
|
|
240
247
|
# Removed duplicate import of VideoFile (already imported at module level)
|
|
241
|
-
|
|
248
|
+
|
|
242
249
|
self.logger.info("Creating VideoFile instance...")
|
|
243
|
-
|
|
250
|
+
|
|
244
251
|
self.current_video = VideoFile.create_from_file_initialized(
|
|
245
|
-
file_path=self.processing_context[
|
|
246
|
-
center_name=self.processing_context[
|
|
247
|
-
processor_name=self.processing_context[
|
|
248
|
-
delete_source=self.processing_context[
|
|
249
|
-
save_video_file=self.processing_context[
|
|
252
|
+
file_path=self.processing_context['file_path'],
|
|
253
|
+
center_name=self.processing_context['center_name'],
|
|
254
|
+
processor_name=self.processing_context['processor_name'],
|
|
255
|
+
delete_source=self.processing_context['delete_source'],
|
|
256
|
+
save_video_file=self.processing_context['save_video'],
|
|
250
257
|
)
|
|
251
|
-
|
|
258
|
+
|
|
252
259
|
if not self.current_video:
|
|
253
260
|
raise RuntimeError("Failed to create VideoFile instance")
|
|
254
|
-
|
|
261
|
+
|
|
255
262
|
# Immediately move to final storage locations
|
|
256
263
|
self._move_to_final_storage()
|
|
257
|
-
|
|
264
|
+
|
|
258
265
|
self.logger.info("Created VideoFile with UUID: %s", self.current_video.uuid)
|
|
259
|
-
|
|
266
|
+
|
|
260
267
|
# Get and mark processing state
|
|
261
268
|
state = VideoFile.get_or_create_state(self.current_video)
|
|
262
269
|
if not state:
|
|
263
270
|
raise RuntimeError("Failed to create VideoFile state")
|
|
264
|
-
|
|
271
|
+
|
|
265
272
|
state.mark_processing_started(save=True)
|
|
266
|
-
self.processing_context[
|
|
273
|
+
self.processing_context['processing_started'] = True
|
|
267
274
|
|
|
268
275
|
def _move_to_final_storage(self):
|
|
269
276
|
"""
|
|
270
277
|
Move video from raw_videos to final storage locations.
|
|
271
|
-
- Raw video → /data/videos (raw_file_path)
|
|
278
|
+
- Raw video → /data/videos (raw_file_path)
|
|
272
279
|
- Processed video will later → /data/anonym_videos (file_path)
|
|
273
280
|
"""
|
|
274
281
|
from endoreg_db.utils import data_paths
|
|
275
|
-
|
|
276
|
-
source_path = self.processing_context[
|
|
282
|
+
|
|
283
|
+
source_path = self.processing_context['file_path']
|
|
277
284
|
|
|
278
285
|
videos_dir = data_paths["video"]
|
|
279
286
|
videos_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -313,7 +320,7 @@ class VideoImportService:
|
|
|
313
320
|
filename = f"{uuid_str}{source_suffix}" if uuid_str else Path(source_path).name
|
|
314
321
|
stored_raw_path = videos_dir / filename
|
|
315
322
|
|
|
316
|
-
delete_source = bool(self.processing_context.get(
|
|
323
|
+
delete_source = bool(self.processing_context.get('delete_source'))
|
|
317
324
|
stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
|
|
318
325
|
|
|
319
326
|
if not stored_raw_path.exists():
|
|
@@ -345,19 +352,19 @@ class VideoImportService:
|
|
|
345
352
|
relative_path = Path(stored_raw_path).relative_to(storage_root)
|
|
346
353
|
if _current_video.raw_file.name != str(relative_path):
|
|
347
354
|
_current_video.raw_file.name = str(relative_path)
|
|
348
|
-
_current_video.save(update_fields=[
|
|
355
|
+
_current_video.save(update_fields=['raw_file'])
|
|
349
356
|
self.logger.info("Updated raw_file path to: %s", relative_path)
|
|
350
357
|
except Exception as e:
|
|
351
358
|
self.logger.error("Failed to ensure raw_file path is relative: %s", e)
|
|
352
359
|
fallback_relative = Path("videos") / Path(stored_raw_path).name
|
|
353
360
|
if _current_video.raw_file.name != fallback_relative.as_posix():
|
|
354
361
|
_current_video.raw_file.name = fallback_relative.as_posix()
|
|
355
|
-
_current_video.save(update_fields=[
|
|
362
|
+
_current_video.save(update_fields=['raw_file'])
|
|
356
363
|
self.logger.info("Updated raw_file path using fallback: %s", fallback_relative.as_posix())
|
|
357
364
|
|
|
358
365
|
# Store paths for later processing
|
|
359
|
-
self.processing_context[
|
|
360
|
-
self.processing_context[
|
|
366
|
+
self.processing_context['raw_video_path'] = Path(stored_raw_path)
|
|
367
|
+
self.processing_context['video_filename'] = Path(stored_raw_path).name
|
|
361
368
|
|
|
362
369
|
def _setup_processing_environment(self):
|
|
363
370
|
"""Setup the processing environment without file movement."""
|
|
@@ -368,32 +375,32 @@ class VideoImportService:
|
|
|
368
375
|
|
|
369
376
|
# Initialize frame objects in database
|
|
370
377
|
video.initialize_frames()
|
|
371
|
-
|
|
378
|
+
|
|
372
379
|
# Extract frames BEFORE processing to prevent pipeline 1 conflicts
|
|
373
380
|
self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
|
|
374
381
|
try:
|
|
375
382
|
frames_extracted = video.extract_frames(overwrite=False)
|
|
376
383
|
if frames_extracted:
|
|
377
|
-
self.processing_context[
|
|
384
|
+
self.processing_context['frames_extracted'] = True
|
|
378
385
|
self.logger.info("Frame extraction completed successfully")
|
|
379
|
-
|
|
386
|
+
|
|
380
387
|
# CRITICAL: Immediately save the frames_extracted state to database
|
|
381
388
|
# to prevent refresh_from_db() in pipeline 1 from overriding it
|
|
382
389
|
state = video.get_or_create_state()
|
|
383
390
|
if not state.frames_extracted:
|
|
384
391
|
state.frames_extracted = True
|
|
385
|
-
state.save(update_fields=[
|
|
392
|
+
state.save(update_fields=['frames_extracted'])
|
|
386
393
|
self.logger.info("Persisted frames_extracted=True to database")
|
|
387
394
|
else:
|
|
388
395
|
self.logger.warning("Frame extraction failed, but continuing...")
|
|
389
|
-
self.processing_context[
|
|
396
|
+
self.processing_context['frames_extracted'] = False
|
|
390
397
|
except Exception as e:
|
|
391
398
|
self.logger.warning(f"Frame extraction failed during setup: {e}, but continuing...")
|
|
392
|
-
self.processing_context[
|
|
393
|
-
|
|
399
|
+
self.processing_context['frames_extracted'] = False
|
|
400
|
+
|
|
394
401
|
# Ensure default patient data
|
|
395
402
|
self._ensure_default_patient_data(video_instance=video)
|
|
396
|
-
|
|
403
|
+
|
|
397
404
|
self.logger.info("Processing environment setup completed")
|
|
398
405
|
|
|
399
406
|
def _process_frames_and_metadata(self):
|
|
@@ -412,24 +419,25 @@ class VideoImportService:
|
|
|
412
419
|
|
|
413
420
|
try:
|
|
414
421
|
self.logger.info("Starting frame-level anonymization with processor ROI masking...")
|
|
415
|
-
|
|
422
|
+
|
|
416
423
|
# Get processor ROI information
|
|
417
424
|
endoscope_data_roi_nested, endoscope_image_roi = self._get_processor_roi_info()
|
|
418
|
-
|
|
425
|
+
|
|
419
426
|
# Perform frame cleaning with timeout to prevent blocking
|
|
420
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
421
|
-
|
|
422
|
-
|
|
427
|
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
|
428
|
+
|
|
423
429
|
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
424
430
|
future = executor.submit(self._perform_frame_cleaning, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi)
|
|
425
431
|
try:
|
|
426
432
|
# Increased timeout to better accommodate ffmpeg + OCR
|
|
427
433
|
future.result(timeout=300)
|
|
428
|
-
self.processing_context[
|
|
434
|
+
self.processing_context['anonymization_completed'] = True
|
|
429
435
|
self.logger.info("Frame cleaning completed successfully within timeout")
|
|
430
436
|
except FutureTimeoutError:
|
|
431
437
|
self.logger.warning("Frame cleaning timed out; entering grace period check for cleaned output")
|
|
432
438
|
# Grace period: detect if cleaned file appears shortly after timeout
|
|
439
|
+
raw_video_path = self.processing_context.get('raw_video_path')
|
|
440
|
+
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
|
|
433
441
|
grace_seconds = 60
|
|
434
442
|
expected_cleaned_path: Optional[Path] = None
|
|
435
443
|
processed_field = video.processed_file
|
|
@@ -442,8 +450,8 @@ class VideoImportService:
|
|
|
442
450
|
if expected_cleaned_path is not None:
|
|
443
451
|
for _ in range(grace_seconds):
|
|
444
452
|
if expected_cleaned_path.exists():
|
|
445
|
-
self.processing_context[
|
|
446
|
-
self.processing_context[
|
|
453
|
+
self.processing_context['cleaned_video_path'] = expected_cleaned_path
|
|
454
|
+
self.processing_context['anonymization_completed'] = True
|
|
447
455
|
self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
|
|
448
456
|
found = True
|
|
449
457
|
break
|
|
@@ -461,8 +469,8 @@ class VideoImportService:
|
|
|
461
469
|
except Exception as fallback_error:
|
|
462
470
|
self.logger.error("Fallback anonymization also failed: %s", fallback_error)
|
|
463
471
|
# If even fallback fails, mark as not anonymized but continue import
|
|
464
|
-
self.processing_context[
|
|
465
|
-
self.processing_context[
|
|
472
|
+
self.processing_context['anonymization_completed'] = False
|
|
473
|
+
self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
|
|
466
474
|
|
|
467
475
|
def _save_anonymized_video(self):
|
|
468
476
|
video = self._require_current_video()
|
|
@@ -473,7 +481,9 @@ class VideoImportService:
|
|
|
473
481
|
|
|
474
482
|
new_processed_hash = get_video_hash(anonymized_video_path)
|
|
475
483
|
if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
|
|
476
|
-
raise ValueError(
|
|
484
|
+
raise ValueError(
|
|
485
|
+
f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
|
|
486
|
+
)
|
|
477
487
|
|
|
478
488
|
video.processed_video_hash = new_processed_hash
|
|
479
489
|
video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
|
|
@@ -492,11 +502,11 @@ class VideoImportService:
|
|
|
492
502
|
|
|
493
503
|
update_fields.extend(["raw_file", "video_hash"])
|
|
494
504
|
|
|
495
|
-
transaction.on_commit(
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
)
|
|
505
|
+
transaction.on_commit(lambda: _cleanup_raw_assets(
|
|
506
|
+
video_uuid=video.uuid,
|
|
507
|
+
raw_file_path=original_raw_file_path_to_delete,
|
|
508
|
+
raw_frame_dir=original_raw_frame_dir_to_delete
|
|
509
|
+
))
|
|
500
510
|
|
|
501
511
|
video.save(update_fields=update_fields)
|
|
502
512
|
video.state.mark_anonymized(save=True)
|
|
@@ -513,35 +523,21 @@ class VideoImportService:
|
|
|
513
523
|
video = self.current_video
|
|
514
524
|
if video is None:
|
|
515
525
|
self.logger.warning("No VideoFile instance available for fallback anonymization")
|
|
516
|
-
|
|
517
|
-
# Try VideoFile.pipe_2() method if available
|
|
518
|
-
if hasattr(video, "pipe_2"):
|
|
519
|
-
self.logger.info("Trying VideoFile.pipe_2() method...")
|
|
520
|
-
if video.pipe_2():
|
|
521
|
-
self.logger.info("VideoFile.pipe_2() succeeded")
|
|
522
|
-
self.processing_context["anonymization_completed"] = True
|
|
523
|
-
return
|
|
524
|
-
self.logger.warning("VideoFile.pipe_2() returned False")
|
|
525
|
-
# Try direct anonymization via _anonymize
|
|
526
|
-
if _anonymize(video, delete_original_raw=self.delete_source):
|
|
527
|
-
self.logger.info("VideoFile._anonymize() succeeded")
|
|
528
|
-
self.processing_context["anonymization_completed"] = True
|
|
529
|
-
return
|
|
526
|
+
|
|
530
527
|
|
|
531
528
|
# Strategy 2: Simple copy (no processing, just copy raw to processed)
|
|
532
529
|
self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
|
|
533
|
-
self.processing_context[
|
|
534
|
-
self.processing_context[
|
|
530
|
+
self.processing_context['anonymization_completed'] = False
|
|
531
|
+
self.processing_context['use_raw_as_processed'] = True
|
|
535
532
|
self.logger.warning("Fallback: Video will be imported without anonymization (raw copy used)")
|
|
536
533
|
except Exception as e:
|
|
537
534
|
self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
|
|
538
|
-
self.processing_context[
|
|
539
|
-
self.processing_context[
|
|
540
|
-
|
|
535
|
+
self.processing_context['anonymization_completed'] = False
|
|
536
|
+
self.processing_context['error_reason'] = str(e)
|
|
541
537
|
def _finalize_processing(self):
|
|
542
538
|
"""Finalize processing and update video state."""
|
|
543
539
|
self.logger.info("Updating video processing state...")
|
|
544
|
-
|
|
540
|
+
|
|
545
541
|
with transaction.atomic():
|
|
546
542
|
video = self._require_current_video()
|
|
547
543
|
try:
|
|
@@ -550,33 +546,36 @@ class VideoImportService:
|
|
|
550
546
|
self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
|
|
551
547
|
|
|
552
548
|
state = video.get_or_create_state()
|
|
553
|
-
|
|
549
|
+
|
|
554
550
|
# Only mark frames as extracted if they were successfully extracted
|
|
555
|
-
if self.processing_context.get(
|
|
551
|
+
if self.processing_context.get('frames_extracted', False):
|
|
556
552
|
state.frames_extracted = True
|
|
557
553
|
self.logger.info("Marked frames as extracted in state")
|
|
558
554
|
else:
|
|
559
555
|
self.logger.warning("Frames were not extracted, not updating state")
|
|
560
|
-
|
|
556
|
+
|
|
561
557
|
# Always mark these as true (metadata extraction attempts were made)
|
|
562
558
|
state.frames_initialized = True
|
|
563
559
|
state.video_meta_extracted = True
|
|
564
560
|
state.text_meta_extracted = True
|
|
565
|
-
|
|
561
|
+
|
|
566
562
|
# ✅ FIX: Only mark as processed if anonymization actually completed
|
|
567
|
-
anonymization_completed = self.processing_context.get(
|
|
563
|
+
anonymization_completed = self.processing_context.get('anonymization_completed', False)
|
|
568
564
|
if anonymization_completed:
|
|
569
565
|
state.mark_sensitive_meta_processed(save=False)
|
|
570
566
|
self.logger.info("Anonymization completed - marking sensitive meta as processed")
|
|
571
567
|
else:
|
|
572
|
-
self.logger.warning(
|
|
568
|
+
self.logger.warning(
|
|
569
|
+
"Anonymization NOT completed - NOT marking as processed. "
|
|
570
|
+
f"Reason: {self.processing_context.get('error_reason', 'Unknown')}"
|
|
571
|
+
)
|
|
573
572
|
# Explicitly mark as NOT processed
|
|
574
573
|
state.sensitive_meta_processed = False
|
|
575
|
-
|
|
574
|
+
|
|
576
575
|
# Save all state changes
|
|
577
576
|
state.save()
|
|
578
577
|
self.logger.info("Video processing state updated")
|
|
579
|
-
|
|
578
|
+
|
|
580
579
|
# Signal completion
|
|
581
580
|
self._signal_completion()
|
|
582
581
|
|
|
@@ -590,12 +589,12 @@ class VideoImportService:
|
|
|
590
589
|
video = self._require_current_video()
|
|
591
590
|
|
|
592
591
|
processed_video_path = None
|
|
593
|
-
if
|
|
594
|
-
processed_video_path = self.processing_context[
|
|
592
|
+
if 'cleaned_video_path' in self.processing_context:
|
|
593
|
+
processed_video_path = self.processing_context['cleaned_video_path']
|
|
595
594
|
else:
|
|
596
|
-
raw_video_path = self.processing_context.get(
|
|
595
|
+
raw_video_path = self.processing_context.get('raw_video_path')
|
|
597
596
|
if raw_video_path and Path(raw_video_path).exists():
|
|
598
|
-
video_filename = self.processing_context.get(
|
|
597
|
+
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
|
|
599
598
|
processed_filename = f"processed_{video_filename}"
|
|
600
599
|
processed_video_path = Path(raw_video_path).parent / processed_filename
|
|
601
600
|
try:
|
|
@@ -624,13 +623,13 @@ class VideoImportService:
|
|
|
624
623
|
except Exception as exc:
|
|
625
624
|
self.logger.error("Failed to update processed_file path: %s", exc)
|
|
626
625
|
video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
|
|
627
|
-
video.save(update_fields=[
|
|
626
|
+
video.save(update_fields=['processed_file'])
|
|
628
627
|
self.logger.info(
|
|
629
628
|
"Updated processed_file path using fallback: %s",
|
|
630
629
|
f"anonym_videos/{anonym_video_filename}",
|
|
631
630
|
)
|
|
632
631
|
|
|
633
|
-
self.processing_context[
|
|
632
|
+
self.processing_context['anonymization_completed'] = True
|
|
634
633
|
else:
|
|
635
634
|
self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
|
|
636
635
|
except Exception as exc:
|
|
@@ -640,14 +639,13 @@ class VideoImportService:
|
|
|
640
639
|
|
|
641
640
|
try:
|
|
642
641
|
from endoreg_db.utils.paths import RAW_FRAME_DIR
|
|
643
|
-
|
|
644
642
|
shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
|
|
645
643
|
self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
|
|
646
644
|
except Exception as exc:
|
|
647
645
|
self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
|
|
648
646
|
|
|
649
|
-
source_path = self.processing_context[
|
|
650
|
-
if self.processing_context[
|
|
647
|
+
source_path = self.processing_context['file_path']
|
|
648
|
+
if self.processing_context['delete_source'] and Path(source_path).exists():
|
|
651
649
|
try:
|
|
652
650
|
os.remove(source_path)
|
|
653
651
|
self.logger.info("Removed remaining source file: %s", source_path)
|
|
@@ -658,25 +656,25 @@ class VideoImportService:
|
|
|
658
656
|
self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
|
|
659
657
|
try:
|
|
660
658
|
video.anonymize(delete_original_raw=self.delete_source)
|
|
661
|
-
video.save(update_fields=[
|
|
659
|
+
video.save(update_fields=['processed_file'])
|
|
662
660
|
self.logger.info("Late-stage anonymization succeeded")
|
|
663
661
|
except Exception as e:
|
|
664
662
|
self.logger.error("Late-stage anonymization failed: %s", e)
|
|
665
|
-
self.processing_context[
|
|
663
|
+
self.processing_context['anonymization_completed'] = False
|
|
666
664
|
|
|
667
665
|
self.logger.info("Cleanup and archiving completed")
|
|
668
666
|
|
|
669
|
-
self.processed_files.add(str(self.processing_context[
|
|
667
|
+
self.processed_files.add(str(self.processing_context['file_path']))
|
|
670
668
|
|
|
671
669
|
with transaction.atomic():
|
|
672
670
|
video.refresh_from_db()
|
|
673
|
-
if hasattr(video,
|
|
671
|
+
if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
|
|
674
672
|
video.state.mark_sensitive_meta_processed(save=True)
|
|
675
673
|
|
|
676
674
|
self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
|
|
677
675
|
self.logger.info("Raw video stored in: /data/videos")
|
|
678
676
|
self.logger.info("Processed video stored in: /data/anonym_videos")
|
|
679
|
-
|
|
677
|
+
|
|
680
678
|
def _create_sensitive_file(
|
|
681
679
|
self,
|
|
682
680
|
video_instance: VideoFile | None = None,
|
|
@@ -739,7 +737,7 @@ class VideoImportService:
|
|
|
739
737
|
self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
|
|
740
738
|
return target_file_path
|
|
741
739
|
|
|
742
|
-
def _get_processor_roi_info(self) -> Tuple[Optional[Any], Optional[Dict[str, Any]]]:
|
|
740
|
+
def _get_processor_roi_info(self) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
|
|
743
741
|
"""Get processor ROI information for masking."""
|
|
744
742
|
endoscope_data_roi_nested = None
|
|
745
743
|
endoscope_image_roi = None
|
|
@@ -813,67 +811,73 @@ class VideoImportService:
|
|
|
813
811
|
except Exception as exc:
|
|
814
812
|
self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
|
|
815
813
|
|
|
814
|
+
|
|
815
|
+
|
|
816
816
|
def _ensure_frame_cleaning_available(self):
|
|
817
817
|
"""
|
|
818
818
|
Ensure frame cleaning modules are available by adding lx-anonymizer to path.
|
|
819
|
-
|
|
819
|
+
|
|
820
820
|
Returns:
|
|
821
821
|
Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
|
|
822
822
|
"""
|
|
823
823
|
try:
|
|
824
824
|
# Check if we can find the lx-anonymizer directory
|
|
825
825
|
from importlib import resources
|
|
826
|
-
|
|
827
826
|
lx_anonymizer_path = resources.files("lx_anonymizer")
|
|
828
827
|
|
|
829
828
|
# make sure lx_anonymizer_path is a Path object
|
|
830
829
|
lx_anonymizer_path = Path(str(lx_anonymizer_path))
|
|
831
|
-
|
|
830
|
+
|
|
832
831
|
if lx_anonymizer_path.exists():
|
|
833
832
|
# Add to Python path temporarily
|
|
834
833
|
if str(lx_anonymizer_path) not in sys.path:
|
|
835
834
|
sys.path.insert(0, str(lx_anonymizer_path))
|
|
836
|
-
|
|
835
|
+
|
|
837
836
|
# Try simple import
|
|
838
837
|
from lx_anonymizer import FrameCleaner, ReportReader
|
|
839
|
-
|
|
838
|
+
|
|
840
839
|
self.logger.info("Successfully imported lx_anonymizer modules")
|
|
841
|
-
|
|
840
|
+
|
|
842
841
|
# Remove from path to avoid conflicts
|
|
843
842
|
if str(lx_anonymizer_path) in sys.path:
|
|
844
843
|
sys.path.remove(str(lx_anonymizer_path))
|
|
845
|
-
|
|
844
|
+
|
|
846
845
|
return True, FrameCleaner, ReportReader
|
|
847
|
-
|
|
846
|
+
|
|
848
847
|
else:
|
|
849
|
-
self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
|
|
850
|
-
|
|
848
|
+
self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
|
|
849
|
+
|
|
851
850
|
except Exception as e:
|
|
852
851
|
self.logger.warning(f"Frame cleaning not available: {e}")
|
|
853
|
-
|
|
852
|
+
|
|
854
853
|
return False, None, None
|
|
855
854
|
|
|
855
|
+
|
|
856
|
+
|
|
856
857
|
def _perform_frame_cleaning(self, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi):
|
|
857
858
|
"""Perform frame cleaning and anonymization."""
|
|
858
859
|
# Instantiate frame cleaner
|
|
859
860
|
frame_cleaner = FrameCleaner()
|
|
860
|
-
|
|
861
|
+
|
|
861
862
|
# Prepare parameters for frame cleaning
|
|
862
|
-
raw_video_path = self.processing_context.get(
|
|
863
|
-
|
|
863
|
+
raw_video_path = self.processing_context.get('raw_video_path')
|
|
864
|
+
|
|
864
865
|
if not raw_video_path or not Path(raw_video_path).exists():
|
|
865
866
|
raise RuntimeError(f"Raw video path not found: {raw_video_path}")
|
|
866
|
-
|
|
867
|
-
#
|
|
867
|
+
|
|
868
|
+
# Get processor name safely
|
|
868
869
|
video = self._require_current_video()
|
|
869
|
-
|
|
870
|
+
video_meta = getattr(video, "video_meta", None)
|
|
871
|
+
processor = getattr(video_meta, "processor", None) if video_meta else None
|
|
872
|
+
device_name = processor.name if processor else self.processing_context['processor_name']
|
|
873
|
+
|
|
874
|
+
# Create temporary output path for cleaned video
|
|
875
|
+
video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
|
|
870
876
|
cleaned_filename = f"cleaned_{video_filename}"
|
|
871
877
|
cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
# Processor roi can be used later to OCR preknown regions.
|
|
876
|
-
|
|
878
|
+
|
|
879
|
+
# Processor roi is used later to OCR preknown regions.
|
|
880
|
+
|
|
877
881
|
# Clean video with ROI masking (heavy I/O operation)
|
|
878
882
|
actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
|
|
879
883
|
video_path=Path(raw_video_path),
|
|
@@ -881,39 +885,18 @@ class VideoImportService:
|
|
|
881
885
|
endoscope_image_roi=endoscope_image_roi,
|
|
882
886
|
endoscope_data_roi_nested=endoscope_data_roi_nested,
|
|
883
887
|
output_path=cleaned_video_path,
|
|
884
|
-
technique="mask_overlay"
|
|
888
|
+
technique="mask_overlay"
|
|
885
889
|
)
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
try:
|
|
889
|
-
# Prefer frames belonging to this video (UUID in path), else pick any frame
|
|
890
|
-
frame_candidates = list(RAW_FRAME_DIR.rglob("*.jpg")) + list(RAW_FRAME_DIR.rglob("*.png"))
|
|
891
|
-
video_uuid = str(video.uuid)
|
|
892
|
-
filtered = [p for p in frame_candidates if video_uuid in str(p)] or frame_candidates
|
|
893
|
-
if filtered:
|
|
894
|
-
sample_frame = random.choice(filtered)
|
|
895
|
-
ocr_text = trocr_full_image_ocr(sample_frame)
|
|
896
|
-
if ocr_text:
|
|
897
|
-
llm_metadata = frame_cleaner.extract_metadata(ocr_text)
|
|
898
|
-
if llm_metadata:
|
|
899
|
-
# Merge with already extracted frame-level metadata
|
|
900
|
-
extracted_metadata = frame_cleaner.frame_metadata_extractor.merge_metadata(extracted_metadata or {}, llm_metadata)
|
|
901
|
-
self.logger.info("LLM metadata extraction (random frame) successful")
|
|
902
|
-
else:
|
|
903
|
-
self.logger.info("LLM metadata extraction (random frame) found no data")
|
|
904
|
-
else:
|
|
905
|
-
self.logger.info("No text extracted by TrOCR on random frame")
|
|
906
|
-
except Exception as e:
|
|
907
|
-
self.logger.error(f"LLM metadata enrichment step failed: {e}")
|
|
908
|
-
|
|
890
|
+
|
|
891
|
+
|
|
909
892
|
# Store cleaned video path for later use in _cleanup_and_archive
|
|
910
|
-
self.processing_context[
|
|
911
|
-
self.processing_context[
|
|
912
|
-
|
|
893
|
+
self.processing_context['cleaned_video_path'] = actual_cleaned_path
|
|
894
|
+
self.processing_context['extracted_metadata'] = extracted_metadata
|
|
895
|
+
|
|
913
896
|
# Update sensitive metadata with extracted information
|
|
914
897
|
self._update_sensitive_metadata(extracted_metadata)
|
|
915
898
|
self.logger.info(f"Extracted metadata from frame cleaning: {extracted_metadata}")
|
|
916
|
-
|
|
899
|
+
|
|
917
900
|
self.logger.info(f"Frame cleaning with ROI masking completed: {actual_cleaned_path}")
|
|
918
901
|
self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
|
|
919
902
|
|
|
@@ -931,13 +914,13 @@ class VideoImportService:
|
|
|
931
914
|
|
|
932
915
|
sm = sensitive_meta
|
|
933
916
|
updated_fields = []
|
|
934
|
-
|
|
917
|
+
|
|
935
918
|
try:
|
|
936
919
|
sm.update_from_dict(extracted_metadata)
|
|
937
920
|
updated_fields = list(extracted_metadata.keys())
|
|
938
921
|
except KeyError as e:
|
|
939
922
|
self.logger.warning(f"Failed to update SensitiveMeta field {e}")
|
|
940
|
-
|
|
923
|
+
|
|
941
924
|
if updated_fields:
|
|
942
925
|
sm.save(update_fields=updated_fields)
|
|
943
926
|
self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
|
|
@@ -961,18 +944,22 @@ class VideoImportService:
|
|
|
961
944
|
except (ValueError, OSError):
|
|
962
945
|
raw_exists = False
|
|
963
946
|
|
|
964
|
-
video_processing_complete =
|
|
947
|
+
video_processing_complete = (
|
|
948
|
+
video.sensitive_meta is not None and
|
|
949
|
+
video.video_meta is not None and
|
|
950
|
+
raw_exists
|
|
951
|
+
)
|
|
965
952
|
|
|
966
953
|
if video_processing_complete:
|
|
967
954
|
self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
|
|
968
955
|
|
|
969
956
|
# Update completion flags if they exist
|
|
970
957
|
completion_fields = []
|
|
971
|
-
for field_name in [
|
|
958
|
+
for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
|
|
972
959
|
if hasattr(video, field_name):
|
|
973
960
|
setattr(video, field_name, True)
|
|
974
961
|
completion_fields.append(field_name)
|
|
975
|
-
|
|
962
|
+
|
|
976
963
|
if completion_fields:
|
|
977
964
|
video.save(update_fields=completion_fields)
|
|
978
965
|
self.logger.info("Updated completion flags: %s", completion_fields)
|
|
@@ -981,15 +968,15 @@ class VideoImportService:
|
|
|
981
968
|
"Video %s processing incomplete - missing required components",
|
|
982
969
|
video.uuid,
|
|
983
970
|
)
|
|
984
|
-
|
|
971
|
+
|
|
985
972
|
except Exception as e:
|
|
986
973
|
self.logger.warning(f"Failed to signal completion status: {e}")
|
|
987
974
|
|
|
988
975
|
def _cleanup_on_error(self):
|
|
989
976
|
"""Cleanup processing context on error."""
|
|
990
|
-
if self.current_video and hasattr(self.current_video,
|
|
977
|
+
if self.current_video and hasattr(self.current_video, 'state'):
|
|
991
978
|
try:
|
|
992
|
-
if self.processing_context.get(
|
|
979
|
+
if self.processing_context.get('processing_started'):
|
|
993
980
|
self.current_video.state.frames_extracted = False
|
|
994
981
|
self.current_video.state.frames_initialized = False
|
|
995
982
|
self.current_video.state.video_meta_extracted = False
|
|
@@ -1001,28 +988,29 @@ class VideoImportService:
|
|
|
1001
988
|
def _cleanup_processing_context(self):
|
|
1002
989
|
"""
|
|
1003
990
|
Cleanup processing context and release file lock.
|
|
1004
|
-
|
|
991
|
+
|
|
1005
992
|
This method is always called in the finally block of import_and_anonymize()
|
|
1006
993
|
to ensure the file lock is released even if processing fails.
|
|
1007
994
|
"""
|
|
1008
995
|
try:
|
|
1009
996
|
# Release file lock if it was acquired
|
|
1010
|
-
lock_context = self.processing_context.get(
|
|
997
|
+
lock_context = self.processing_context.get('_lock_context')
|
|
1011
998
|
if lock_context is not None:
|
|
1012
999
|
try:
|
|
1013
1000
|
lock_context.__exit__(None, None, None)
|
|
1014
1001
|
self.logger.info("Released file lock")
|
|
1015
1002
|
except Exception as e:
|
|
1016
1003
|
self.logger.warning(f"Error releasing file lock: {e}")
|
|
1017
|
-
|
|
1004
|
+
|
|
1018
1005
|
# Remove file from processed set if processing failed
|
|
1019
|
-
file_path = self.processing_context.get(
|
|
1020
|
-
if file_path and not self.processing_context.get(
|
|
1006
|
+
file_path = self.processing_context.get('file_path')
|
|
1007
|
+
if file_path and not self.processing_context.get('anonymization_completed'):
|
|
1021
1008
|
file_path_str = str(file_path)
|
|
1022
1009
|
if file_path_str in self.processed_files:
|
|
1023
1010
|
self.processed_files.remove(file_path_str)
|
|
1024
1011
|
self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
|
|
1025
|
-
|
|
1012
|
+
|
|
1013
|
+
|
|
1026
1014
|
except Exception as e:
|
|
1027
1015
|
self.logger.warning(f"Error during context cleanup: {e}")
|
|
1028
1016
|
finally:
|
|
@@ -1030,7 +1018,6 @@ class VideoImportService:
|
|
|
1030
1018
|
self.current_video = None
|
|
1031
1019
|
self.processing_context = {}
|
|
1032
1020
|
|
|
1033
|
-
|
|
1034
1021
|
# Convenience function for callers/tests that expect a module-level import_and_anonymize
|
|
1035
1022
|
def import_and_anonymize(
|
|
1036
1023
|
file_path,
|
|
@@ -1049,4 +1036,4 @@ def import_and_anonymize(
|
|
|
1049
1036
|
processor_name=processor_name,
|
|
1050
1037
|
save_video=save_video,
|
|
1051
1038
|
delete_source=delete_source,
|
|
1052
|
-
)
|
|
1039
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: endoreg-db
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.3.0
|
|
4
4
|
Summary: EndoReg Db Django App
|
|
5
5
|
Project-URL: Homepage, https://info.coloreg.de
|
|
6
6
|
Project-URL: Repository, https://github.com/wg-lux/endoreg-db
|
|
@@ -29,10 +29,11 @@ Requires-Dist: dotenv>=0.9.9
|
|
|
29
29
|
Requires-Dist: faker>=37.6.0
|
|
30
30
|
Requires-Dist: flake8>=7.3.0
|
|
31
31
|
Requires-Dist: gunicorn>=23.0.0
|
|
32
|
+
Requires-Dist: huggingface-hub>=0.35.3
|
|
32
33
|
Requires-Dist: icecream>=2.1.4
|
|
33
34
|
Requires-Dist: librosa==0.11.0
|
|
34
35
|
Requires-Dist: llvmlite>=0.44.0
|
|
35
|
-
Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.
|
|
36
|
+
Requires-Dist: lx-anonymizer[llm,ocr]>=0.8.7
|
|
36
37
|
Requires-Dist: moviepy==2.2.1
|
|
37
38
|
Requires-Dist: mypy>=1.16.0
|
|
38
39
|
Requires-Dist: numpy>=2.2.3
|
|
@@ -384,7 +384,7 @@ endoreg_db/models/media/pdf/report_reader/report_reader_config.py,sha256=wYVDmPS
|
|
|
384
384
|
endoreg_db/models/media/pdf/report_reader/report_reader_flag.py,sha256=j9tjbLRenxpWfeaseALl8rV2Dqem9YaM_duS1iJkARU,536
|
|
385
385
|
endoreg_db/models/media/video/__init__.py,sha256=ifW4SXXN2q6wAuFwSP7XlYskpX7UX6uy0py5mpCCOCM,211
|
|
386
386
|
endoreg_db/models/media/video/create_from_file.py,sha256=3n4bbzFteEOFDUuEikP0x-StCKI5R5IhyKC7o3kLZ6Y,15128
|
|
387
|
-
endoreg_db/models/media/video/pipe_1.py,sha256=
|
|
387
|
+
endoreg_db/models/media/video/pipe_1.py,sha256=yUzTi0pkw2ISsOoFpLmNky6S_V-TEWMxXmPLfB7gUpA,9899
|
|
388
388
|
endoreg_db/models/media/video/pipe_2.py,sha256=DnMxW0uOqSsf7-0n9Rlvn7u89U4Jpkv7n6hFpQfUjkQ,4964
|
|
389
389
|
endoreg_db/models/media/video/refactor_plan.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
390
390
|
endoreg_db/models/media/video/video_file.py,sha256=txlxR8d1OBgt3UEkWvLcGSyLarh0jXLw-z0SAV5KOok,26789
|
|
@@ -461,8 +461,8 @@ endoreg_db/models/medical/risk/risk.py,sha256=g5pgAfCfsvH88nbmX3xsASF3OZgNA-G6NJ
|
|
|
461
461
|
endoreg_db/models/medical/risk/risk_type.py,sha256=kEugcaWSTEWH_Vxq4dcF80Iv1L4_Kk1JKJGQMgz_s0o,1350
|
|
462
462
|
endoreg_db/models/metadata/__init__.py,sha256=8I6oLj3YTmeaPGJpL0AWG5gLwp38QzrEggxSkTisv7c,474
|
|
463
463
|
endoreg_db/models/metadata/frame_ocr_result.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
464
|
-
endoreg_db/models/metadata/model_meta.py,sha256=
|
|
465
|
-
endoreg_db/models/metadata/model_meta_logic.py,sha256=
|
|
464
|
+
endoreg_db/models/metadata/model_meta.py,sha256=F_r-PTLeNi4J-4EaGCQkGIguhdl7Bwba7_i56ZAjc-4,7589
|
|
465
|
+
endoreg_db/models/metadata/model_meta_logic.py,sha256=27mqScxUTJXNUVc6CqAs5dXjspEsh0TWPmlxdJVulGc,12015
|
|
466
466
|
endoreg_db/models/metadata/pdf_meta.py,sha256=BTmpSgqxmPKi0apcNjyrZAS4AFKCPXVdBd6VBeyyv6E,3174
|
|
467
467
|
endoreg_db/models/metadata/sensitive_meta.py,sha256=ekLHrW-b5uYcjfkRd0EW5ncx5ef8Bu-K6msDkpWCAbk,13034
|
|
468
468
|
endoreg_db/models/metadata/sensitive_meta_logic.py,sha256=Oh7ssZQEPfKGfRMF5nXKJpOIxXx-Xibd3rpOu-bQilk,29988
|
|
@@ -600,7 +600,7 @@ endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CU
|
|
|
600
600
|
endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
|
|
601
601
|
endoreg_db/services/segment_sync.py,sha256=YgHvIHkbW4mqCu0ACf3zjRSZnNfxWwt4gh5syUVXuE0,6400
|
|
602
602
|
endoreg_db/services/storage_aware_video_processor.py,sha256=kKFK64vXLeBSVkp1YJonU3gFDTeXZ8C4qb9QZZB99SE,13420
|
|
603
|
-
endoreg_db/services/video_import.py,sha256=
|
|
603
|
+
endoreg_db/services/video_import.py,sha256=PhcOgxU5M4uSEklBXEWHpIaNX-yIYv1rJy-T-fCU8cs,47830
|
|
604
604
|
endoreg_db/tasks/upload_tasks.py,sha256=OJq7DhNwcbWdXzHY8jz5c51BCVkPN5gSWOz-6Fx6W5M,7799
|
|
605
605
|
endoreg_db/tasks/video_ingest.py,sha256=kxFuYkHijINV0VabQKCFVpJRv6eCAw07tviONurDgg8,5265
|
|
606
606
|
endoreg_db/tasks/video_processing_tasks.py,sha256=KjcERRJ1TZzmavBpvr6OsvSTUViU0PR1ECWnEdzu2Js,14140
|
|
@@ -784,7 +784,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
|
|
|
784
784
|
endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
|
|
785
785
|
endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
|
|
786
786
|
endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
|
|
787
|
-
endoreg_db-0.8.
|
|
788
|
-
endoreg_db-0.8.
|
|
789
|
-
endoreg_db-0.8.
|
|
790
|
-
endoreg_db-0.8.
|
|
787
|
+
endoreg_db-0.8.3.0.dist-info/METADATA,sha256=q7jvhqzrBQmwSOuzXARYftJxbQ5vBUL_zmJG9U338dA,14758
|
|
788
|
+
endoreg_db-0.8.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
789
|
+
endoreg_db-0.8.3.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
790
|
+
endoreg_db-0.8.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|