endoreg-db 0.8.2.7__py3-none-any.whl → 0.8.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -8,24 +8,26 @@ Changelog:
8
8
  October 14, 2025: Added file locking mechanism to prevent race conditions
9
9
  during concurrent video imports (matches PDF import pattern)
10
10
  """
11
- from datetime import date
11
+
12
12
  import logging
13
- import sys
14
13
  import os
14
+ import random
15
15
  import shutil
16
+ import sys
16
17
  import time
17
18
  from contextlib import contextmanager
19
+ from datetime import date
18
20
  from pathlib import Path
19
- from typing import Union, Dict, Any, Optional, List, Tuple
21
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
22
+
20
23
  from django.db import transaction
21
- from endoreg_db.models import VideoFile, SensitiveMeta
22
- from endoreg_db.utils.paths import STORAGE_DIR, RAW_FRAME_DIR, VIDEO_DIR, ANONYM_VIDEO_DIR
23
- import random
24
+ from django.db.models.fields.files import FieldFile
24
25
  from lx_anonymizer.ocr import trocr_full_image_ocr
26
+
27
+ from endoreg_db.models import SensitiveMeta, VideoFile
28
+ from endoreg_db.models.media.video.video_file_anonymize import _anonymize, _cleanup_raw_assets
25
29
  from endoreg_db.utils.hashs import get_video_hash
26
- from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets, _anonymize
27
- from typing import TYPE_CHECKING
28
- from django.db.models.fields.files import FieldFile
30
+ from endoreg_db.utils.paths import ANONYM_VIDEO_DIR, RAW_FRAME_DIR, STORAGE_DIR, VIDEO_DIR
29
31
 
30
32
  if TYPE_CHECKING:
31
33
  from endoreg_db.models import EndoscopyProcessor
@@ -37,37 +39,36 @@ MAX_LOCK_WAIT_SECONDS = 90 # New: wait up to 90s for a non-stale lock to clear
37
39
  logger = logging.getLogger(__name__)
38
40
 
39
41
 
40
- class VideoImportService():
42
+ class VideoImportService:
41
43
  """
42
44
  Service for importing and anonymizing video files.
43
45
  Uses a central video instance pattern for cleaner state management.
44
-
46
+
45
47
  Features (October 14, 2025):
46
48
  - File locking to prevent concurrent processing of the same video
47
49
  - Stale lock detection and reclamation (600s timeout)
48
50
  - Hash-based duplicate detection
49
51
  - Graceful fallback processing without lx_anonymizer
50
52
  """
51
-
53
+
52
54
  def __init__(self, project_root: Optional[Path] = None):
53
-
54
55
  # Set up project root path
55
56
  if project_root:
56
57
  self.project_root = Path(project_root)
57
58
  else:
58
59
  self.project_root = Path(__file__).parent.parent.parent.parent
59
-
60
+
60
61
  # Track processed files to prevent duplicates
61
62
  self.processed_files = set(str(file) for file in os.listdir(ANONYM_VIDEO_DIR))
62
-
63
+
63
64
  self.STORAGE_DIR = STORAGE_DIR
64
-
65
+
65
66
  # Central video instance and processing context
66
67
  self.current_video: Optional[VideoFile] = None
67
68
  self.processing_context: Dict[str, Any] = {}
68
-
69
+
69
70
  self.delete_source = False
70
-
71
+
71
72
  self.logger = logging.getLogger(__name__)
72
73
 
73
74
  def _require_current_video(self) -> VideoFile:
@@ -75,12 +76,12 @@ class VideoImportService():
75
76
  if self.current_video is None:
76
77
  raise RuntimeError("Current video instance is not set")
77
78
  return self.current_video
78
-
79
+
79
80
  @contextmanager
80
81
  def _file_lock(self, path: Path):
81
82
  """
82
83
  Create a file lock to prevent duplicate processing of the same video.
83
-
84
+
84
85
  This context manager creates a .lock file alongside the video file.
85
86
  If the lock file already exists, it checks if it's stale (older than
86
87
  STALE_LOCK_SECONDS) and reclaims it if necessary. If it's not stale,
@@ -104,24 +105,21 @@ class VideoImportService():
104
105
  except FileNotFoundError:
105
106
  # Race: lock removed between exists and stat; retry acquire in next loop
106
107
  age = None
107
-
108
+
108
109
  if age is not None and age > STALE_LOCK_SECONDS:
109
110
  try:
110
- logger.warning(
111
- "Stale lock detected for %s (age %.0fs). Reclaiming lock...",
112
- path, age
113
- )
111
+ logger.warning("Stale lock detected for %s (age %.0fs). Reclaiming lock...", path, age)
114
112
  lock_path.unlink()
115
113
  except Exception as e:
116
114
  logger.warning("Failed to remove stale lock %s: %s", lock_path, e)
117
115
  # Loop continues and retries acquire immediately
118
116
  continue
119
-
117
+
120
118
  # Not stale: wait until deadline, then give up gracefully
121
119
  if time.time() >= deadline:
122
120
  raise ValueError(f"File already being processed: {path}")
123
121
  time.sleep(1.0)
124
-
122
+
125
123
  os.write(fd, b"lock")
126
124
  os.close(fd)
127
125
  fd = None
@@ -134,11 +132,11 @@ class VideoImportService():
134
132
  lock_path.unlink()
135
133
  except OSError:
136
134
  pass
137
-
135
+
138
136
  def processed(self) -> bool:
139
137
  """Indicates if the current file has already been processed."""
140
- return getattr(self, '_processed', False)
141
-
138
+ return getattr(self, "_processed", False)
139
+
142
140
  def import_and_anonymize(
143
141
  self,
144
142
  file_path: Union[Path, str],
@@ -153,9 +151,8 @@ class VideoImportService():
153
151
  """
154
152
  try:
155
153
  # Initialize processing context
156
- self._initialize_processing_context(file_path, center_name, processor_name,
157
- save_video, delete_source)
158
-
154
+ self._initialize_processing_context(file_path, center_name, processor_name, save_video, delete_source)
155
+
159
156
  # Validate and prepare file (may raise ValueError if another worker holds a non-stale lock)
160
157
  try:
161
158
  self._validate_and_prepare_file()
@@ -165,27 +162,24 @@ class VideoImportService():
165
162
  self.logger.info(f"Skipping {file_path}: {ve}")
166
163
  return None
167
164
  raise
168
-
169
- # Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
170
- self._create_sensitive_file()
171
-
172
- # Create or retrieve video instance
165
+
166
+ # Create or retrieve video instance FIRST
173
167
  self._create_or_retrieve_video_instance()
174
-
168
+
175
169
  # Setup processing environment
176
170
  self._setup_processing_environment()
177
-
171
+
178
172
  # Process frames and metadata
179
173
  self._process_frames_and_metadata()
180
-
174
+
181
175
  # Finalize processing
182
176
  self._finalize_processing()
183
-
177
+
184
178
  # Move files and cleanup
185
179
  self._cleanup_and_archive()
186
-
180
+
187
181
  return self.current_video
188
-
182
+
189
183
  except Exception as e:
190
184
  self.logger.error(f"Video import and anonymization failed for {file_path}: {e}")
191
185
  self._cleanup_on_error()
@@ -193,94 +187,93 @@ class VideoImportService():
193
187
  finally:
194
188
  self._cleanup_processing_context()
195
189
 
196
- def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str,
197
- processor_name: str, save_video: bool, delete_source: bool):
190
+ def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str, processor_name: str, save_video: bool, delete_source: bool):
198
191
  """Initialize the processing context for the current video import."""
199
192
  self.processing_context = {
200
- 'file_path': Path(file_path),
201
- 'center_name': center_name,
202
- 'processor_name': processor_name,
203
- 'save_video': save_video,
204
- 'delete_source': delete_source,
205
- 'processing_started': False,
206
- 'frames_extracted': False,
207
- 'anonymization_completed': False,
208
- 'error_reason': None
193
+ "file_path": Path(file_path),
194
+ "center_name": center_name,
195
+ "processor_name": processor_name,
196
+ "save_video": save_video,
197
+ "delete_source": delete_source,
198
+ "processing_started": False,
199
+ "frames_extracted": False,
200
+ "anonymization_completed": False,
201
+ "error_reason": None,
209
202
  }
210
-
203
+
211
204
  self.logger.info(f"Initialized processing context for: {file_path}")
212
205
 
213
206
  def _validate_and_prepare_file(self):
214
207
  """
215
208
  Validate the video file and prepare for processing.
216
-
209
+
217
210
  Uses file locking to prevent concurrent processing of the same video file.
218
211
  This prevents race conditions where multiple workers might try to process
219
212
  the same video simultaneously.
220
-
213
+
221
214
  The lock is acquired here and held for the entire import process.
222
215
  See _file_lock() for lock reclamation logic.
223
216
  """
224
- file_path = self.processing_context['file_path']
225
-
217
+ file_path = self.processing_context["file_path"]
218
+
226
219
  # Acquire file lock to prevent concurrent processing
227
220
  # Lock will be held until finally block in import_and_anonymize()
228
- self.processing_context['_lock_context'] = self._file_lock(file_path)
229
- self.processing_context['_lock_context'].__enter__()
230
-
221
+ self.processing_context["_lock_context"] = self._file_lock(file_path)
222
+ self.processing_context["_lock_context"].__enter__()
223
+
231
224
  self.logger.info("Acquired file lock for: %s", file_path)
232
-
225
+
233
226
  # Check if already processed (memory-based check)
234
227
  if str(file_path) in self.processed_files:
235
228
  self.logger.info("File %s already processed, skipping", file_path)
236
229
  self._processed = True
237
230
  raise ValueError(f"File already processed: {file_path}")
238
-
231
+
239
232
  # Check file exists
240
233
  if not file_path.exists():
241
234
  raise FileNotFoundError(f"Video file not found: {file_path}")
242
-
235
+
243
236
  self.logger.info("File validation completed for: %s", file_path)
244
237
 
245
238
  def _create_or_retrieve_video_instance(self):
246
239
  """Create or retrieve the VideoFile instance and move to final storage."""
247
240
  # Removed duplicate import of VideoFile (already imported at module level)
248
-
241
+
249
242
  self.logger.info("Creating VideoFile instance...")
250
-
243
+
251
244
  self.current_video = VideoFile.create_from_file_initialized(
252
- file_path=self.processing_context['file_path'],
253
- center_name=self.processing_context['center_name'],
254
- processor_name=self.processing_context['processor_name'],
255
- delete_source=self.processing_context['delete_source'],
256
- save_video_file=self.processing_context['save_video'],
245
+ file_path=self.processing_context["file_path"],
246
+ center_name=self.processing_context["center_name"],
247
+ processor_name=self.processing_context["processor_name"],
248
+ delete_source=self.processing_context["delete_source"],
249
+ save_video_file=self.processing_context["save_video"],
257
250
  )
258
-
251
+
259
252
  if not self.current_video:
260
253
  raise RuntimeError("Failed to create VideoFile instance")
261
-
254
+
262
255
  # Immediately move to final storage locations
263
256
  self._move_to_final_storage()
264
-
257
+
265
258
  self.logger.info("Created VideoFile with UUID: %s", self.current_video.uuid)
266
-
259
+
267
260
  # Get and mark processing state
268
261
  state = VideoFile.get_or_create_state(self.current_video)
269
262
  if not state:
270
263
  raise RuntimeError("Failed to create VideoFile state")
271
-
264
+
272
265
  state.mark_processing_started(save=True)
273
- self.processing_context['processing_started'] = True
266
+ self.processing_context["processing_started"] = True
274
267
 
275
268
  def _move_to_final_storage(self):
276
269
  """
277
270
  Move video from raw_videos to final storage locations.
278
- - Raw video → /data/videos (raw_file_path)
271
+ - Raw video → /data/videos (raw_file_path)
279
272
  - Processed video will later → /data/anonym_videos (file_path)
280
273
  """
281
274
  from endoreg_db.utils import data_paths
282
-
283
- source_path = self.processing_context['file_path']
275
+
276
+ source_path = self.processing_context["file_path"]
284
277
 
285
278
  videos_dir = data_paths["video"]
286
279
  videos_dir.mkdir(parents=True, exist_ok=True)
@@ -320,7 +313,7 @@ class VideoImportService():
320
313
  filename = f"{uuid_str}{source_suffix}" if uuid_str else Path(source_path).name
321
314
  stored_raw_path = videos_dir / filename
322
315
 
323
- delete_source = bool(self.processing_context.get('delete_source'))
316
+ delete_source = bool(self.processing_context.get("delete_source"))
324
317
  stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
325
318
 
326
319
  if not stored_raw_path.exists():
@@ -352,19 +345,19 @@ class VideoImportService():
352
345
  relative_path = Path(stored_raw_path).relative_to(storage_root)
353
346
  if _current_video.raw_file.name != str(relative_path):
354
347
  _current_video.raw_file.name = str(relative_path)
355
- _current_video.save(update_fields=['raw_file'])
348
+ _current_video.save(update_fields=["raw_file"])
356
349
  self.logger.info("Updated raw_file path to: %s", relative_path)
357
350
  except Exception as e:
358
351
  self.logger.error("Failed to ensure raw_file path is relative: %s", e)
359
352
  fallback_relative = Path("videos") / Path(stored_raw_path).name
360
353
  if _current_video.raw_file.name != fallback_relative.as_posix():
361
354
  _current_video.raw_file.name = fallback_relative.as_posix()
362
- _current_video.save(update_fields=['raw_file'])
355
+ _current_video.save(update_fields=["raw_file"])
363
356
  self.logger.info("Updated raw_file path using fallback: %s", fallback_relative.as_posix())
364
357
 
365
358
  # Store paths for later processing
366
- self.processing_context['raw_video_path'] = Path(stored_raw_path)
367
- self.processing_context['video_filename'] = Path(stored_raw_path).name
359
+ self.processing_context["raw_video_path"] = Path(stored_raw_path)
360
+ self.processing_context["video_filename"] = Path(stored_raw_path).name
368
361
 
369
362
  def _setup_processing_environment(self):
370
363
  """Setup the processing environment without file movement."""
@@ -375,32 +368,32 @@ class VideoImportService():
375
368
 
376
369
  # Initialize frame objects in database
377
370
  video.initialize_frames()
378
-
371
+
379
372
  # Extract frames BEFORE processing to prevent pipeline 1 conflicts
380
373
  self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
381
374
  try:
382
375
  frames_extracted = video.extract_frames(overwrite=False)
383
376
  if frames_extracted:
384
- self.processing_context['frames_extracted'] = True
377
+ self.processing_context["frames_extracted"] = True
385
378
  self.logger.info("Frame extraction completed successfully")
386
-
379
+
387
380
  # CRITICAL: Immediately save the frames_extracted state to database
388
381
  # to prevent refresh_from_db() in pipeline 1 from overriding it
389
382
  state = video.get_or_create_state()
390
383
  if not state.frames_extracted:
391
384
  state.frames_extracted = True
392
- state.save(update_fields=['frames_extracted'])
385
+ state.save(update_fields=["frames_extracted"])
393
386
  self.logger.info("Persisted frames_extracted=True to database")
394
387
  else:
395
388
  self.logger.warning("Frame extraction failed, but continuing...")
396
- self.processing_context['frames_extracted'] = False
389
+ self.processing_context["frames_extracted"] = False
397
390
  except Exception as e:
398
391
  self.logger.warning(f"Frame extraction failed during setup: {e}, but continuing...")
399
- self.processing_context['frames_extracted'] = False
400
-
392
+ self.processing_context["frames_extracted"] = False
393
+
401
394
  # Ensure default patient data
402
395
  self._ensure_default_patient_data(video_instance=video)
403
-
396
+
404
397
  self.logger.info("Processing environment setup completed")
405
398
 
406
399
  def _process_frames_and_metadata(self):
@@ -419,25 +412,24 @@ class VideoImportService():
419
412
 
420
413
  try:
421
414
  self.logger.info("Starting frame-level anonymization with processor ROI masking...")
422
-
415
+
423
416
  # Get processor ROI information
424
417
  endoscope_data_roi_nested, endoscope_image_roi = self._get_processor_roi_info()
425
-
418
+
426
419
  # Perform frame cleaning with timeout to prevent blocking
427
- from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
428
-
420
+ from concurrent.futures import ThreadPoolExecutor
421
+ from concurrent.futures import TimeoutError as FutureTimeoutError
422
+
429
423
  with ThreadPoolExecutor(max_workers=1) as executor:
430
424
  future = executor.submit(self._perform_frame_cleaning, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi)
431
425
  try:
432
426
  # Increased timeout to better accommodate ffmpeg + OCR
433
427
  future.result(timeout=300)
434
- self.processing_context['anonymization_completed'] = True
428
+ self.processing_context["anonymization_completed"] = True
435
429
  self.logger.info("Frame cleaning completed successfully within timeout")
436
430
  except FutureTimeoutError:
437
431
  self.logger.warning("Frame cleaning timed out; entering grace period check for cleaned output")
438
432
  # Grace period: detect if cleaned file appears shortly after timeout
439
- raw_video_path = self.processing_context.get('raw_video_path')
440
- video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name if raw_video_path else "video.mp4")
441
433
  grace_seconds = 60
442
434
  expected_cleaned_path: Optional[Path] = None
443
435
  processed_field = video.processed_file
@@ -450,8 +442,8 @@ class VideoImportService():
450
442
  if expected_cleaned_path is not None:
451
443
  for _ in range(grace_seconds):
452
444
  if expected_cleaned_path.exists():
453
- self.processing_context['cleaned_video_path'] = expected_cleaned_path
454
- self.processing_context['anonymization_completed'] = True
445
+ self.processing_context["cleaned_video_path"] = expected_cleaned_path
446
+ self.processing_context["anonymization_completed"] = True
455
447
  self.logger.info("Detected cleaned video during grace period: %s", expected_cleaned_path)
456
448
  found = True
457
449
  break
@@ -469,8 +461,8 @@ class VideoImportService():
469
461
  except Exception as fallback_error:
470
462
  self.logger.error("Fallback anonymization also failed: %s", fallback_error)
471
463
  # If even fallback fails, mark as not anonymized but continue import
472
- self.processing_context['anonymization_completed'] = False
473
- self.processing_context['error_reason'] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
464
+ self.processing_context["anonymization_completed"] = False
465
+ self.processing_context["error_reason"] = f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
474
466
 
475
467
  def _save_anonymized_video(self):
476
468
  video = self._require_current_video()
@@ -481,9 +473,7 @@ class VideoImportService():
481
473
 
482
474
  new_processed_hash = get_video_hash(anonymized_video_path)
483
475
  if video.__class__.objects.filter(processed_video_hash=new_processed_hash).exclude(pk=video.pk).exists():
484
- raise ValueError(
485
- f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
486
- )
476
+ raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid}).")
487
477
 
488
478
  video.processed_video_hash = new_processed_hash
489
479
  video.processed_file.name = anonymized_video_path.relative_to(STORAGE_DIR).as_posix()
@@ -502,11 +492,11 @@ class VideoImportService():
502
492
 
503
493
  update_fields.extend(["raw_file", "video_hash"])
504
494
 
505
- transaction.on_commit(lambda: _cleanup_raw_assets(
506
- video_uuid=video.uuid,
507
- raw_file_path=original_raw_file_path_to_delete,
508
- raw_frame_dir=original_raw_frame_dir_to_delete
509
- ))
495
+ transaction.on_commit(
496
+ lambda: _cleanup_raw_assets(
497
+ video_uuid=video.uuid, raw_file_path=original_raw_file_path_to_delete, raw_frame_dir=original_raw_frame_dir_to_delete
498
+ )
499
+ )
510
500
 
511
501
  video.save(update_fields=update_fields)
512
502
  video.state.mark_anonymized(save=True)
@@ -525,32 +515,33 @@ class VideoImportService():
525
515
  self.logger.warning("No VideoFile instance available for fallback anonymization")
526
516
  else:
527
517
  # Try VideoFile.pipe_2() method if available
528
- if hasattr(video, 'pipe_2'):
518
+ if hasattr(video, "pipe_2"):
529
519
  self.logger.info("Trying VideoFile.pipe_2() method...")
530
520
  if video.pipe_2():
531
521
  self.logger.info("VideoFile.pipe_2() succeeded")
532
- self.processing_context['anonymization_completed'] = True
522
+ self.processing_context["anonymization_completed"] = True
533
523
  return
534
524
  self.logger.warning("VideoFile.pipe_2() returned False")
535
525
  # Try direct anonymization via _anonymize
536
526
  if _anonymize(video, delete_original_raw=self.delete_source):
537
527
  self.logger.info("VideoFile._anonymize() succeeded")
538
- self.processing_context['anonymization_completed'] = True
528
+ self.processing_context["anonymization_completed"] = True
539
529
  return
540
530
 
541
531
  # Strategy 2: Simple copy (no processing, just copy raw to processed)
542
532
  self.logger.info("Using simple copy fallback (raw video will be used as 'processed' video)")
543
- self.processing_context['anonymization_completed'] = False
544
- self.processing_context['use_raw_as_processed'] = True
533
+ self.processing_context["anonymization_completed"] = False
534
+ self.processing_context["use_raw_as_processed"] = True
545
535
  self.logger.warning("Fallback: Video will be imported without anonymization (raw copy used)")
546
536
  except Exception as e:
547
537
  self.logger.error(f"Error during fallback anonymization: {e}", exc_info=True)
548
- self.processing_context['anonymization_completed'] = False
549
- self.processing_context['error_reason'] = str(e)
538
+ self.processing_context["anonymization_completed"] = False
539
+ self.processing_context["error_reason"] = str(e)
540
+
550
541
  def _finalize_processing(self):
551
542
  """Finalize processing and update video state."""
552
543
  self.logger.info("Updating video processing state...")
553
-
544
+
554
545
  with transaction.atomic():
555
546
  video = self._require_current_video()
556
547
  try:
@@ -559,36 +550,33 @@ class VideoImportService():
559
550
  self.logger.warning("Could not refresh VideoFile %s from DB: %s", video.uuid, refresh_error)
560
551
 
561
552
  state = video.get_or_create_state()
562
-
553
+
563
554
  # Only mark frames as extracted if they were successfully extracted
564
- if self.processing_context.get('frames_extracted', False):
555
+ if self.processing_context.get("frames_extracted", False):
565
556
  state.frames_extracted = True
566
557
  self.logger.info("Marked frames as extracted in state")
567
558
  else:
568
559
  self.logger.warning("Frames were not extracted, not updating state")
569
-
560
+
570
561
  # Always mark these as true (metadata extraction attempts were made)
571
562
  state.frames_initialized = True
572
563
  state.video_meta_extracted = True
573
564
  state.text_meta_extracted = True
574
-
565
+
575
566
  # ✅ FIX: Only mark as processed if anonymization actually completed
576
- anonymization_completed = self.processing_context.get('anonymization_completed', False)
567
+ anonymization_completed = self.processing_context.get("anonymization_completed", False)
577
568
  if anonymization_completed:
578
569
  state.mark_sensitive_meta_processed(save=False)
579
570
  self.logger.info("Anonymization completed - marking sensitive meta as processed")
580
571
  else:
581
- self.logger.warning(
582
- "Anonymization NOT completed - NOT marking as processed. "
583
- f"Reason: {self.processing_context.get('error_reason', 'Unknown')}"
584
- )
572
+ self.logger.warning(f"Anonymization NOT completed - NOT marking as processed. Reason: {self.processing_context.get('error_reason', 'Unknown')}")
585
573
  # Explicitly mark as NOT processed
586
574
  state.sensitive_meta_processed = False
587
-
575
+
588
576
  # Save all state changes
589
577
  state.save()
590
578
  self.logger.info("Video processing state updated")
591
-
579
+
592
580
  # Signal completion
593
581
  self._signal_completion()
594
582
 
@@ -602,12 +590,12 @@ class VideoImportService():
602
590
  video = self._require_current_video()
603
591
 
604
592
  processed_video_path = None
605
- if 'cleaned_video_path' in self.processing_context:
606
- processed_video_path = self.processing_context['cleaned_video_path']
593
+ if "cleaned_video_path" in self.processing_context:
594
+ processed_video_path = self.processing_context["cleaned_video_path"]
607
595
  else:
608
- raw_video_path = self.processing_context.get('raw_video_path')
596
+ raw_video_path = self.processing_context.get("raw_video_path")
609
597
  if raw_video_path and Path(raw_video_path).exists():
610
- video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
598
+ video_filename = self.processing_context.get("video_filename", Path(raw_video_path).name)
611
599
  processed_filename = f"processed_{video_filename}"
612
600
  processed_video_path = Path(raw_video_path).parent / processed_filename
613
601
  try:
@@ -636,13 +624,13 @@ class VideoImportService():
636
624
  except Exception as exc:
637
625
  self.logger.error("Failed to update processed_file path: %s", exc)
638
626
  video.processed_file.name = f"anonym_videos/{anonym_video_filename}"
639
- video.save(update_fields=['processed_file'])
627
+ video.save(update_fields=["processed_file"])
640
628
  self.logger.info(
641
629
  "Updated processed_file path using fallback: %s",
642
630
  f"anonym_videos/{anonym_video_filename}",
643
631
  )
644
632
 
645
- self.processing_context['anonymization_completed'] = True
633
+ self.processing_context["anonymization_completed"] = True
646
634
  else:
647
635
  self.logger.warning("Processed video file not found after move: %s", anonym_target_path)
648
636
  except Exception as exc:
@@ -652,13 +640,14 @@ class VideoImportService():
652
640
 
653
641
  try:
654
642
  from endoreg_db.utils.paths import RAW_FRAME_DIR
643
+
655
644
  shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
656
645
  self.logger.debug("Cleaned up temporary frames directory: %s", RAW_FRAME_DIR)
657
646
  except Exception as exc:
658
647
  self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
659
648
 
660
- source_path = self.processing_context['file_path']
661
- if self.processing_context['delete_source'] and Path(source_path).exists():
649
+ source_path = self.processing_context["file_path"]
650
+ if self.processing_context["delete_source"] and Path(source_path).exists():
662
651
  try:
663
652
  os.remove(source_path)
664
653
  self.logger.info("Removed remaining source file: %s", source_path)
@@ -669,25 +658,25 @@ class VideoImportService():
669
658
  self.logger.warning("No processed_file found after cleanup - video will be unprocessed")
670
659
  try:
671
660
  video.anonymize(delete_original_raw=self.delete_source)
672
- video.save(update_fields=['processed_file'])
661
+ video.save(update_fields=["processed_file"])
673
662
  self.logger.info("Late-stage anonymization succeeded")
674
663
  except Exception as e:
675
664
  self.logger.error("Late-stage anonymization failed: %s", e)
676
- self.processing_context['anonymization_completed'] = False
665
+ self.processing_context["anonymization_completed"] = False
677
666
 
678
667
  self.logger.info("Cleanup and archiving completed")
679
668
 
680
- self.processed_files.add(str(self.processing_context['file_path']))
669
+ self.processed_files.add(str(self.processing_context["file_path"]))
681
670
 
682
671
  with transaction.atomic():
683
672
  video.refresh_from_db()
684
- if hasattr(video, 'state') and self.processing_context.get('anonymization_completed'):
673
+ if hasattr(video, "state") and self.processing_context.get("anonymization_completed"):
685
674
  video.state.mark_sensitive_meta_processed(save=True)
686
675
 
687
676
  self.logger.info("Import and anonymization completed for VideoFile UUID: %s", video.uuid)
688
677
  self.logger.info("Raw video stored in: /data/videos")
689
678
  self.logger.info("Processed video stored in: /data/anonym_videos")
690
-
679
+
691
680
  def _create_sensitive_file(
692
681
  self,
693
682
  video_instance: VideoFile | None = None,
@@ -750,7 +739,7 @@ class VideoImportService():
750
739
  self.logger.info("Created sensitive file for %s at %s", video.uuid, target_file_path)
751
740
  return target_file_path
752
741
 
753
- def _get_processor_roi_info(self) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
742
+ def _get_processor_roi_info(self) -> Tuple[Optional[Any], Optional[Dict[str, Any]]]:
754
743
  """Get processor ROI information for masking."""
755
744
  endoscope_data_roi_nested = None
756
745
  endoscope_image_roi = None
@@ -824,75 +813,67 @@ class VideoImportService():
824
813
  except Exception as exc:
825
814
  self.logger.error("Failed to update SensitiveMeta for video %s: %s", video.uuid, exc)
826
815
 
827
-
828
-
829
816
  def _ensure_frame_cleaning_available(self):
830
817
  """
831
818
  Ensure frame cleaning modules are available by adding lx-anonymizer to path.
832
-
819
+
833
820
  Returns:
834
821
  Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
835
822
  """
836
823
  try:
837
824
  # Check if we can find the lx-anonymizer directory
838
825
  from importlib import resources
826
+
839
827
  lx_anonymizer_path = resources.files("lx_anonymizer")
840
828
 
841
829
  # make sure lx_anonymizer_path is a Path object
842
830
  lx_anonymizer_path = Path(str(lx_anonymizer_path))
843
-
831
+
844
832
  if lx_anonymizer_path.exists():
845
833
  # Add to Python path temporarily
846
834
  if str(lx_anonymizer_path) not in sys.path:
847
835
  sys.path.insert(0, str(lx_anonymizer_path))
848
-
836
+
849
837
  # Try simple import
850
838
  from lx_anonymizer import FrameCleaner, ReportReader
851
-
839
+
852
840
  self.logger.info("Successfully imported lx_anonymizer modules")
853
-
841
+
854
842
  # Remove from path to avoid conflicts
855
843
  if str(lx_anonymizer_path) in sys.path:
856
844
  sys.path.remove(str(lx_anonymizer_path))
857
-
845
+
858
846
  return True, FrameCleaner, ReportReader
859
-
847
+
860
848
  else:
861
- self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
862
-
849
+ self.logger.warning(f"lx-anonymizer path not found: {lx_anonymizer_path}")
850
+
863
851
  except Exception as e:
864
852
  self.logger.warning(f"Frame cleaning not available: {e}")
865
-
866
- return False, None, None
867
853
 
868
-
854
+ return False, None, None
869
855
 
870
856
  def _perform_frame_cleaning(self, FrameCleaner, endoscope_data_roi_nested, endoscope_image_roi):
871
857
  """Perform frame cleaning and anonymization."""
872
858
  # Instantiate frame cleaner
873
859
  frame_cleaner = FrameCleaner()
874
-
860
+
875
861
  # Prepare parameters for frame cleaning
876
- raw_video_path = self.processing_context.get('raw_video_path')
877
-
862
+ raw_video_path = self.processing_context.get("raw_video_path")
863
+
878
864
  if not raw_video_path or not Path(raw_video_path).exists():
879
865
  raise RuntimeError(f"Raw video path not found: {raw_video_path}")
880
-
881
- # Get processor name safely
882
- video = self._require_current_video()
883
- video_meta = getattr(video, "video_meta", None)
884
- processor = getattr(video_meta, "processor", None) if video_meta else None
885
- device_name = processor.name if processor else self.processing_context['processor_name']
886
-
866
+
887
867
  # Create temporary output path for cleaned video
888
- video_filename = self.processing_context.get('video_filename', Path(raw_video_path).name)
868
+ video = self._require_current_video()
869
+ video_filename = self.processing_context.get("video_filename", Path(raw_video_path).name)
889
870
  cleaned_filename = f"cleaned_{video_filename}"
890
871
  cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
891
-
892
- processor_roi, endoscope_roi = self._get_processor_roi_info(video)
893
-
872
+
873
+ processor_roi, endoscope_roi = self._get_processor_roi_info()
874
+
894
875
  # Processor roi can be used later to OCR preknown regions.
895
-
876
+
896
877
  # Clean video with ROI masking (heavy I/O operation)
897
878
  actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
898
879
  video_path=Path(raw_video_path),
@@ -900,9 +881,9 @@ class VideoImportService():
900
881
  endoscope_image_roi=endoscope_image_roi,
901
882
  endoscope_data_roi_nested=endoscope_data_roi_nested,
902
883
  output_path=cleaned_video_path,
903
- technique="mask_overlay"
884
+ technique="mask_overlay",
904
885
  )
905
-
886
+
906
887
  # Optional: enrich metadata using TrOCR+LLM on one random extracted frame
907
888
  try:
908
889
  # Prefer frames belonging to this video (UUID in path), else pick any frame
@@ -916,9 +897,7 @@ class VideoImportService():
916
897
  llm_metadata = frame_cleaner.extract_metadata(ocr_text)
917
898
  if llm_metadata:
918
899
  # Merge with already extracted frame-level metadata
919
- extracted_metadata = frame_cleaner.frame_metadata_extractor.merge_metadata(
920
- extracted_metadata or {}, llm_metadata
921
- )
900
+ extracted_metadata = frame_cleaner.frame_metadata_extractor.merge_metadata(extracted_metadata or {}, llm_metadata)
922
901
  self.logger.info("LLM metadata extraction (random frame) successful")
923
902
  else:
924
903
  self.logger.info("LLM metadata extraction (random frame) found no data")
@@ -926,15 +905,15 @@ class VideoImportService():
926
905
  self.logger.info("No text extracted by TrOCR on random frame")
927
906
  except Exception as e:
928
907
  self.logger.error(f"LLM metadata enrichment step failed: {e}")
929
-
908
+
930
909
  # Store cleaned video path for later use in _cleanup_and_archive
931
- self.processing_context['cleaned_video_path'] = actual_cleaned_path
932
- self.processing_context['extracted_metadata'] = extracted_metadata
933
-
910
+ self.processing_context["cleaned_video_path"] = actual_cleaned_path
911
+ self.processing_context["extracted_metadata"] = extracted_metadata
912
+
934
913
  # Update sensitive metadata with extracted information
935
914
  self._update_sensitive_metadata(extracted_metadata)
936
915
  self.logger.info(f"Extracted metadata from frame cleaning: {extracted_metadata}")
937
-
916
+
938
917
  self.logger.info(f"Frame cleaning with ROI masking completed: {actual_cleaned_path}")
939
918
  self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
940
919
 
@@ -952,13 +931,13 @@ class VideoImportService():
952
931
 
953
932
  sm = sensitive_meta
954
933
  updated_fields = []
955
-
934
+
956
935
  try:
957
936
  sm.update_from_dict(extracted_metadata)
958
937
  updated_fields = list(extracted_metadata.keys())
959
938
  except KeyError as e:
960
939
  self.logger.warning(f"Failed to update SensitiveMeta field {e}")
961
-
940
+
962
941
  if updated_fields:
963
942
  sm.save(update_fields=updated_fields)
964
943
  self.logger.info("Updated SensitiveMeta fields for video %s: %s", video.uuid, updated_fields)
@@ -982,22 +961,18 @@ class VideoImportService():
982
961
  except (ValueError, OSError):
983
962
  raw_exists = False
984
963
 
985
- video_processing_complete = (
986
- video.sensitive_meta is not None and
987
- video.video_meta is not None and
988
- raw_exists
989
- )
964
+ video_processing_complete = video.sensitive_meta is not None and video.video_meta is not None and raw_exists
990
965
 
991
966
  if video_processing_complete:
992
967
  self.logger.info("Video %s processing completed successfully - ready for validation", video.uuid)
993
968
 
994
969
  # Update completion flags if they exist
995
970
  completion_fields = []
996
- for field_name in ['import_completed', 'processing_complete', 'ready_for_validation']:
971
+ for field_name in ["import_completed", "processing_complete", "ready_for_validation"]:
997
972
  if hasattr(video, field_name):
998
973
  setattr(video, field_name, True)
999
974
  completion_fields.append(field_name)
1000
-
975
+
1001
976
  if completion_fields:
1002
977
  video.save(update_fields=completion_fields)
1003
978
  self.logger.info("Updated completion flags: %s", completion_fields)
@@ -1006,15 +981,15 @@ class VideoImportService():
1006
981
  "Video %s processing incomplete - missing required components",
1007
982
  video.uuid,
1008
983
  )
1009
-
984
+
1010
985
  except Exception as e:
1011
986
  self.logger.warning(f"Failed to signal completion status: {e}")
1012
987
 
1013
988
  def _cleanup_on_error(self):
1014
989
  """Cleanup processing context on error."""
1015
- if self.current_video and hasattr(self.current_video, 'state'):
990
+ if self.current_video and hasattr(self.current_video, "state"):
1016
991
  try:
1017
- if self.processing_context.get('processing_started'):
992
+ if self.processing_context.get("processing_started"):
1018
993
  self.current_video.state.frames_extracted = False
1019
994
  self.current_video.state.frames_initialized = False
1020
995
  self.current_video.state.video_meta_extracted = False
@@ -1026,31 +1001,28 @@ class VideoImportService():
1026
1001
  def _cleanup_processing_context(self):
1027
1002
  """
1028
1003
  Cleanup processing context and release file lock.
1029
-
1004
+
1030
1005
  This method is always called in the finally block of import_and_anonymize()
1031
1006
  to ensure the file lock is released even if processing fails.
1032
1007
  """
1033
1008
  try:
1034
1009
  # Release file lock if it was acquired
1035
- lock_context = self.processing_context.get('_lock_context')
1010
+ lock_context = self.processing_context.get("_lock_context")
1036
1011
  if lock_context is not None:
1037
1012
  try:
1038
1013
  lock_context.__exit__(None, None, None)
1039
1014
  self.logger.info("Released file lock")
1040
1015
  except Exception as e:
1041
1016
  self.logger.warning(f"Error releasing file lock: {e}")
1042
-
1017
+
1043
1018
  # Remove file from processed set if processing failed
1044
- file_path = self.processing_context.get('file_path')
1045
- if file_path and not self.processing_context.get('anonymization_completed'):
1019
+ file_path = self.processing_context.get("file_path")
1020
+ if file_path and not self.processing_context.get("anonymization_completed"):
1046
1021
  file_path_str = str(file_path)
1047
1022
  if file_path_str in self.processed_files:
1048
1023
  self.processed_files.remove(file_path_str)
1049
1024
  self.logger.info(f"Removed {file_path_str} from processed files (failed processing)")
1050
-
1051
-
1052
-
1053
-
1025
+
1054
1026
  except Exception as e:
1055
1027
  self.logger.warning(f"Error during context cleanup: {e}")
1056
1028
  finally:
@@ -1058,6 +1030,7 @@ class VideoImportService():
1058
1030
  self.current_video = None
1059
1031
  self.processing_context = {}
1060
1032
 
1033
+
1061
1034
  # Convenience function for callers/tests that expect a module-level import_and_anonymize
1062
1035
  def import_and_anonymize(
1063
1036
  file_path,
@@ -1076,4 +1049,4 @@ def import_and_anonymize(
1076
1049
  processor_name=processor_name,
1077
1050
  save_video=save_video,
1078
1051
  delete_source=delete_source,
1079
- )
1052
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endoreg-db
3
- Version: 0.8.2.7
3
+ Version: 0.8.2.8
4
4
  Summary: EndoReg Db Django App
5
5
  Project-URL: Homepage, https://info.coloreg.de
6
6
  Project-URL: Repository, https://github.com/wg-lux/endoreg-db
@@ -600,7 +600,7 @@ endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CU
600
600
  endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
601
601
  endoreg_db/services/segment_sync.py,sha256=YgHvIHkbW4mqCu0ACf3zjRSZnNfxWwt4gh5syUVXuE0,6400
602
602
  endoreg_db/services/storage_aware_video_processor.py,sha256=kKFK64vXLeBSVkp1YJonU3gFDTeXZ8C4qb9QZZB99SE,13420
603
- endoreg_db/services/video_import.py,sha256=X20FQkEO5QGcfuacAz8jX1_LW1GhwbF33JGPpmypEyk,50161
603
+ endoreg_db/services/video_import.py,sha256=NhQ9eJRUUNo9-j6c6ru921xt-oBgGMY0KN2zsRpskGI,48239
604
604
  endoreg_db/tasks/upload_tasks.py,sha256=OJq7DhNwcbWdXzHY8jz5c51BCVkPN5gSWOz-6Fx6W5M,7799
605
605
  endoreg_db/tasks/video_ingest.py,sha256=kxFuYkHijINV0VabQKCFVpJRv6eCAw07tviONurDgg8,5265
606
606
  endoreg_db/tasks/video_processing_tasks.py,sha256=KjcERRJ1TZzmavBpvr6OsvSTUViU0PR1ECWnEdzu2Js,14140
@@ -784,7 +784,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
784
784
  endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
785
785
  endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
786
786
  endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
787
- endoreg_db-0.8.2.7.dist-info/METADATA,sha256=BCQcbq7ZExYZF5gBfmkDflNtysGjAt36WUMKmqfA-48,14719
788
- endoreg_db-0.8.2.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
789
- endoreg_db-0.8.2.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
790
- endoreg_db-0.8.2.7.dist-info/RECORD,,
787
+ endoreg_db-0.8.2.8.dist-info/METADATA,sha256=uKCJOlsyIekOWKARAh7y3eO9Ch5KBe0RmcgRM38o4UE,14719
788
+ endoreg_db-0.8.2.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
789
+ endoreg_db-0.8.2.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
790
+ endoreg_db-0.8.2.8.dist-info/RECORD,,