endoreg-db 0.8.1__py3-none-any.whl → 0.8.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (48) hide show
  1. endoreg_db/helpers/download_segmentation_model.py +31 -0
  2. endoreg_db/migrations/0003_add_center_display_name.py +30 -0
  3. endoreg_db/models/administration/center/center.py +7 -1
  4. endoreg_db/models/media/pdf/raw_pdf.py +31 -26
  5. endoreg_db/models/media/video/create_from_file.py +26 -4
  6. endoreg_db/models/media/video/pipe_1.py +13 -1
  7. endoreg_db/models/media/video/video_file.py +36 -13
  8. endoreg_db/models/media/video/video_file_anonymize.py +2 -1
  9. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +12 -0
  10. endoreg_db/models/media/video/video_file_io.py +4 -2
  11. endoreg_db/models/metadata/video_meta.py +2 -2
  12. endoreg_db/serializers/anonymization.py +3 -0
  13. endoreg_db/services/pdf_import.py +131 -45
  14. endoreg_db/services/video_import.py +427 -128
  15. endoreg_db/urls/__init__.py +0 -2
  16. endoreg_db/urls/media.py +201 -4
  17. endoreg_db/urls/report.py +0 -30
  18. endoreg_db/urls/sensitive_meta.py +0 -36
  19. endoreg_db/urls/video.py +30 -88
  20. endoreg_db/utils/paths.py +2 -10
  21. endoreg_db/utils/video/ffmpeg_wrapper.py +67 -4
  22. endoreg_db/views/anonymization/validate.py +76 -32
  23. endoreg_db/views/media/__init__.py +38 -2
  24. endoreg_db/views/media/pdf_media.py +1 -1
  25. endoreg_db/views/media/segments.py +71 -0
  26. endoreg_db/views/media/sensitive_metadata.py +314 -0
  27. endoreg_db/views/media/video_segments.py +596 -0
  28. endoreg_db/views/pdf/reimport.py +18 -8
  29. endoreg_db/views/video/__init__.py +0 -8
  30. endoreg_db/views/video/correction.py +34 -32
  31. endoreg_db/views/video/reimport.py +15 -12
  32. endoreg_db/views/video/video_stream.py +168 -50
  33. {endoreg_db-0.8.1.dist-info → endoreg_db-0.8.2.1.dist-info}/METADATA +2 -2
  34. {endoreg_db-0.8.1.dist-info → endoreg_db-0.8.2.1.dist-info}/RECORD +47 -43
  35. endoreg_db/views/video/media/__init__.py +0 -23
  36. /endoreg_db/{urls/pdf.py → config/__init__.py} +0 -0
  37. /endoreg_db/views/video/{media/task_status.py → task_status.py} +0 -0
  38. /endoreg_db/views/video/{media/video_analyze.py → video_analyze.py} +0 -0
  39. /endoreg_db/views/video/{media/video_apply_mask.py → video_apply_mask.py} +0 -0
  40. /endoreg_db/views/video/{media/video_correction.py → video_correction.py} +0 -0
  41. /endoreg_db/views/video/{media/video_download_processed.py → video_download_processed.py} +0 -0
  42. /endoreg_db/views/video/{media/video_media.py → video_media.py} +0 -0
  43. /endoreg_db/views/video/{media/video_meta.py → video_meta.py} +0 -0
  44. /endoreg_db/views/video/{media/video_processing_history.py → video_processing_history.py} +0 -0
  45. /endoreg_db/views/video/{media/video_remove_frames.py → video_remove_frames.py} +0 -0
  46. /endoreg_db/views/video/{media/video_reprocess.py → video_reprocess.py} +0 -0
  47. {endoreg_db-0.8.1.dist-info → endoreg_db-0.8.2.1.dist-info}/WHEEL +0 -0
  48. {endoreg_db-0.8.1.dist-info → endoreg_db-0.8.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,31 @@
1
+ import huggingface_hub
2
+ from typing import Optional
3
+
4
+ def download_segmentation_model(
5
+ repo_id: str = "wg-lux/colo_segmentation_RegNetX800MF_base",
6
+ filename: str = "model.safetensors",
7
+ cache_dir: Optional[str] = None
8
+ ) -> str:
9
+ """
10
+ Downloads a segmentation model from Hugging Face and caches it locally.
11
+
12
+ Args:
13
+ repo_id (str): The Hugging Face repository ID (default: wg-lux model).
14
+ filename (str): The specific file to download from the repo (default: model.safetensors).
15
+ cache_dir (str): The directory to cache the downloaded model. If None, uses HF default cache.
16
+
17
+ Returns:
18
+ str: The local path to the downloaded model.
19
+
20
+ Example:
21
+ >>> model_path = download_segmentation_model()
22
+ >>> # Downloads from wg-lux/colo_segmentation_RegNetX800MF_base
23
+ """
24
+ local_path = huggingface_hub.hf_hub_download(
25
+ repo_id=repo_id,
26
+ filename=filename,
27
+ cache_dir=cache_dir,
28
+ force_download=False,
29
+ resume_download=True,
30
+ )
31
+ return local_path
@@ -0,0 +1,30 @@
1
+ from django.db import migrations, models
2
+
3
+
4
+ def populate_display_name(apps, schema_editor):
5
+ Center = apps.get_model('endoreg_db', 'Center')
6
+ for center in Center.objects.all():
7
+ if not center.display_name:
8
+ center.display_name = center.name
9
+ center.save(update_fields=['display_name'])
10
+
11
+
12
+ def reset_display_name(apps, schema_editor):
13
+ Center = apps.get_model('endoreg_db', 'Center')
14
+ Center.objects.update(display_name='')
15
+
16
+
17
+ class Migration(migrations.Migration):
18
+
19
+ dependencies = [
20
+ ('endoreg_db', '0002_add_video_correction_models'),
21
+ ]
22
+
23
+ operations = [
24
+ migrations.AddField(
25
+ model_name='center',
26
+ name='display_name',
27
+ field=models.CharField(blank=True, default='', max_length=255),
28
+ ),
29
+ migrations.RunPython(populate_display_name, reset_display_name),
30
+ ]
@@ -19,6 +19,7 @@ class Center(models.Model):
19
19
 
20
20
  # import_id = models.IntegerField(primary_key=True)
21
21
  name = models.CharField(max_length=255)
22
+ display_name = models.CharField(max_length=255, blank=True, default="")
22
23
 
23
24
  first_names = models.ManyToManyField(
24
25
  to="FirstName",
@@ -45,8 +46,13 @@ class Center(models.Model):
45
46
  def natural_key(self) -> tuple[str]:
46
47
  return (self.name,)
47
48
 
49
+ def save(self, *args, **kwargs):
50
+ if not self.display_name:
51
+ self.display_name = self.name
52
+ super().save(*args, **kwargs)
53
+
48
54
  def __str__(self) -> str:
49
- return str(object=self.name)
55
+ return str(object=self.display_name or self.name)
50
56
 
51
57
  def get_first_names(self):
52
58
  return self.first_names.all()
@@ -383,37 +383,42 @@ class RawPdfFile(models.Model):
383
383
  new_file_name, _uuid = get_uuid_filename(file_path)
384
384
  logger.info(f"Generated new filename: {new_file_name}")
385
385
 
386
- # Create model instance (without file initially)
387
- raw_pdf = cls(
388
- pdf_hash=pdf_hash,
389
- center=center,
390
- )
391
-
392
- # Assign file using Django's File wrapper and save
386
+ # Create model instance via manager so creation can be intercepted/mocked during tests
393
387
  try:
394
388
  with file_path.open("rb") as f:
395
389
  django_file = File(f, name=new_file_name)
396
- raw_pdf.file = django_file # type: ignore # Assign the file object
397
- # Save the instance - Django storage handles the file copy/move
398
- raw_pdf.save()
399
- _file = raw_pdf.file
400
- assert _file is not None
401
- logger.info(f"Created and saved new RawPdfFile {raw_pdf.pk} with file {_file.name}")
402
-
403
- # Verify file exists in storage after save
404
- if not _file.storage.exists(_file.name):
405
- logger.error(f"File was not saved correctly to storage path {_file.name} after model save.")
406
- raise IOError(f"File not found at expected storage path after save: {_file.name}")
407
- # Log the absolute path for debugging if possible (depends on storage)
408
- try:
409
- logger.info(f"File saved to absolute path: {_file.path}")
410
- except NotImplementedError:
411
- logger.info(f"File saved to storage path: {_file.name} (Absolute path not available from storage)")
390
+ raw_pdf = cls.objects.create(
391
+ pdf_hash=pdf_hash,
392
+ center=center,
393
+ file=django_file,
394
+ )
395
+
396
+ _file = raw_pdf.file
397
+ assert _file is not None
398
+ logger.info(
399
+ "Created and saved new RawPdfFile %s with file %s", raw_pdf.pk, _file.name
400
+ )
401
+
402
+ if not _file.storage.exists(_file.name):
403
+ logger.error(
404
+ "File was not saved correctly to storage path %s after model save.",
405
+ _file.name,
406
+ )
407
+ raise IOError(
408
+ f"File not found at expected storage path after save: {_file.name}"
409
+ )
410
+
411
+ try:
412
+ logger.info("File saved to absolute path: %s", _file.path)
413
+ except NotImplementedError:
414
+ logger.info(
415
+ "File saved to storage path: %s (Absolute path not available from storage)",
416
+ _file.name,
417
+ )
412
418
 
413
419
  except Exception as e:
414
- logger.error(f"Error processing or saving file {file_path} for new record: {e}")
415
- # If save failed, the instance might be partially created but not fully saved.
416
- raise # Re-raise the exception
420
+ logger.error("Error processing or saving file %s for new record: %s", file_path, e)
421
+ raise
417
422
 
418
423
  # Delete source file *after* successful save and verification
419
424
  if delete_source:
@@ -6,7 +6,8 @@ from typing import TYPE_CHECKING, Optional, Type
6
6
 
7
7
  # Import the new exceptions from the correct path
8
8
  from endoreg_db.exceptions import InsufficientStorageError, TranscodingError
9
- from ...utils import VIDEO_DIR, TMP_VIDEO_DIR, data_paths
9
+ from ...utils import VIDEO_DIR, TMP_VIDEO_DIR
10
+ from importlib import import_module
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from endoreg_db.models import VideoFile
@@ -170,6 +171,22 @@ def atomic_move_with_fallback(src_path: Path, dst_path: Path) -> bool:
170
171
  raise
171
172
 
172
173
 
174
+ def _get_data_paths():
175
+ """Return the current data_paths mapping (supports patched instances in tests)."""
176
+ utils_module = import_module("endoreg_db.utils")
177
+ return getattr(utils_module, "data_paths")
178
+
179
+
180
+ def _get_path(mapping, key, default):
181
+ """Access mapping by key using __getitem__ so MagicMocks with side effects work."""
182
+ if mapping is None:
183
+ return default
184
+ try:
185
+ return mapping[key]
186
+ except (KeyError, TypeError):
187
+ return default
188
+
189
+
173
190
  def _create_from_file(
174
191
  cls_model: Type["VideoFile"],
175
192
  file_path: Path,
@@ -199,8 +216,12 @@ def _create_from_file(
199
216
 
200
217
  try:
201
218
  # Ensure we operate under the canonical video path root
202
- video_dir = data_paths.get("video", video_dir)
203
- storage_root = Path(video_dir).parent
219
+ data_paths = _get_data_paths()
220
+ resolved_video_dir = _get_path(data_paths, "video", video_dir)
221
+ video_dir = Path(resolved_video_dir)
222
+ storage_root_default = Path(video_dir).parent
223
+ resolved_storage_root = _get_path(data_paths, "storage", storage_root_default)
224
+ storage_root = Path(resolved_storage_root)
204
225
  storage_root.mkdir(parents=True, exist_ok=True)
205
226
 
206
227
  # Check storage capacity before starting any work
@@ -300,7 +321,8 @@ def _create_from_file(
300
321
  # 8. Create the VideoFile instance
301
322
  logger.info("Creating new VideoFile instance with UUID: %s", uuid_val)
302
323
  # Store FileField path relative to storage root including the videos prefix
303
- relative_name = (final_storage_path.relative_to(data_paths['storage'])).as_posix()
324
+ storage_base = Path(_get_path(data_paths, "storage", final_storage_path.parent))
325
+ relative_name = (final_storage_path.relative_to(storage_base)).as_posix()
304
326
  video = cls_model(
305
327
  uuid=uuid_val,
306
328
  raw_file=relative_name,
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  from typing import TYPE_CHECKING, Optional, Dict, List, Tuple
3
3
  from django.db import transaction
4
+ from endoreg_db.helpers.download_segmentation_model import download_segmentation_model
4
5
 
5
6
  # Added imports
6
7
 
@@ -50,6 +51,8 @@ def _pipe_1(
50
51
  if not state.frames_extracted:
51
52
  logger.error("Pipe 1 failed: Frame extraction did not complete successfully.")
52
53
  return False
54
+
55
+
53
56
 
54
57
  # 3. Perform Initial Prediction
55
58
  logger.info(f"Pipe 1: Performing prediction with model '{model_name}'...")
@@ -61,7 +64,16 @@ def _pipe_1(
61
64
  model_meta = ai_model_obj.get_latest_version()
62
65
  except AiModel.DoesNotExist:
63
66
  logger.error(f"Pipe 1 failed: Model '{model_name}' not found.")
64
- return False
67
+ try:
68
+ model_name = download_segmentation_model()
69
+ ai_model_obj = AiModel.objects.get(name=model_name)
70
+ if model_meta_version is not None:
71
+ model_meta = ai_model_obj.metadata_versions.get(version=model_meta_version)
72
+ else:
73
+ model_meta = ai_model_obj.get_latest_version()
74
+ except AiModel.DoesNotExist:
75
+ logger.error(f"Pipe 1 failed: Model '{model_name}' not found.")
76
+ return False
65
77
  except ModelMeta.DoesNotExist:
66
78
  logger.error(
67
79
  f"Pipe 1 failed: ModelMeta version {model_meta_version} for model '{model_name}' not found."
@@ -126,7 +126,7 @@ class VideoFile(models.Model):
126
126
 
127
127
  sensitive_meta = models.OneToOneField(
128
128
  "SensitiveMeta", on_delete=models.SET_NULL, null=True, blank=True, related_name="video_file"
129
- )
129
+ ) # type: ignore
130
130
  center = models.ForeignKey("Center", on_delete=models.PROTECT)
131
131
  processor = models.ForeignKey(
132
132
  "EndoscopyProcessor", on_delete=models.PROTECT, blank=True, null=True
@@ -465,7 +465,18 @@ class VideoFile(models.Model):
465
465
  # Use proper database connection
466
466
  if using is None:
467
467
  using = 'default'
468
-
468
+
469
+ raw_file_path = self.get_raw_file_path()
470
+ if raw_file_path:
471
+ raw_file_path = Path(raw_file_path)
472
+ lock_path = raw_file_path.with_suffix(raw_file_path.suffix + ".lock")
473
+ if lock_path.exists():
474
+ try:
475
+ lock_path.unlink()
476
+ logger.info(f"Removed processing lock: {lock_path}")
477
+ except Exception as e:
478
+ logger.warning(f"Could not remove processing lock {lock_path}: {e}")
479
+
469
480
  try:
470
481
  # Call parent delete with proper parameters
471
482
  super().delete(using=using, keep_parents=keep_parents)
@@ -572,15 +583,28 @@ class VideoFile(models.Model):
572
583
  super().save(*args, **kwargs)
573
584
 
574
585
  def get_or_create_state(self) -> "VideoState":
575
- """
576
- Return the related VideoState instance for this video, creating and assigning a new one if none exists.
577
-
578
- Returns:
579
- VideoState: The associated VideoState instance.
580
- """
581
- if self.state is None:
582
- self.state = VideoState.objects.create()
583
- return self.state
586
+ """Ensure this video has a persisted ``VideoState`` and return it."""
587
+
588
+ state = self.state
589
+
590
+ # When tests reuse cached instances across database flushes, ``state`` may reference
591
+ # a row that no longer exists. Guard against that by validating persistence.
592
+ state_pk = getattr(state, "pk", None)
593
+ if state is not None and state_pk is not None:
594
+ if not VideoState.objects.filter(pk=state_pk).exists():
595
+ state = None
596
+
597
+ if state is None:
598
+ # Create a fresh state to avoid refresh_from_db() failures on unsaved instances.
599
+ state = VideoState.objects.create()
600
+ self.state = state
601
+
602
+ # Persist the relation immediately if the VideoFile already exists in the DB so
603
+ # later refreshes see the association without requiring additional saves.
604
+ if self.pk:
605
+ self.save(update_fields=["state"])
606
+
607
+ return state
584
608
 
585
609
  def get_or_create_sensitive_meta(self) -> "SensitiveMeta":
586
610
  """
@@ -592,8 +616,7 @@ class VideoFile(models.Model):
592
616
  from endoreg_db.models import SensitiveMeta
593
617
  if self.sensitive_meta is None:
594
618
  self.sensitive_meta = SensitiveMeta.objects.create(center = self.center)
595
- # Mark as processed when creating new SensitiveMeta
596
- self.get_or_create_state().mark_sensitive_meta_processed(save=True)
619
+ # Do not mark processed here; it will be set after extraction/validation steps
597
620
  return self.sensitive_meta
598
621
 
599
622
  def get_outside_segments(self, only_validated: bool = False) -> models.QuerySet["LabelVideoSegment"]:
@@ -12,6 +12,7 @@ from django.conf import settings
12
12
 
13
13
  from endoreg_db.utils.hashs import get_video_hash
14
14
  from endoreg_db.utils.validate_endo_roi import validate_endo_roi
15
+ from endoreg_db.utils.paths import STORAGE_DIR
15
16
  from ....utils.video.ffmpeg_wrapper import assemble_video_from_frames
16
17
  from ...utils import anonymize_frame # Import from models.utils
17
18
  from .video_file_segments import _get_outside_frames, _get_outside_frame_numbers
@@ -268,7 +269,7 @@ def _anonymize(video: "VideoFile", delete_original_raw: bool = True) -> bool:
268
269
  raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid}).")
269
270
 
270
271
  video.processed_video_hash = new_processed_hash
271
- video.processed_file.name = video.get_target_anonymized_video_path().relative_to(settings.MEDIA_ROOT).as_posix()
272
+ video.processed_file.name = video.get_target_anonymized_video_path().relative_to(STORAGE_DIR).as_posix()
272
273
 
273
274
  update_fields = [
274
275
  "processed_video_hash",
@@ -97,6 +97,7 @@ def _extract_frame_range(
97
97
  return True # Indicate success as frames are considered present
98
98
 
99
99
  frame_dir.mkdir(parents=True, exist_ok=True)
100
+ extracted_paths = []
100
101
 
101
102
  try:
102
103
  logger.info("Starting frame range extraction [%d, %d) for video %s to %s", start_frame, end_frame, video.uuid, frame_dir)
@@ -111,6 +112,17 @@ def _extract_frame_range(
111
112
 
112
113
  return True
113
114
 
115
+ except FileNotFoundError as err:
116
+ logger.error(
117
+ "Frame range extraction [%d, %d) failed for video %s: %s",
118
+ start_frame,
119
+ end_frame,
120
+ video.uuid,
121
+ err,
122
+ exc_info=True,
123
+ )
124
+ raise
125
+
114
126
  except Exception as e:
115
127
  logger.error("Frame range extraction [%d, %d) or DB update failed for video %s: %s", start_frame, end_frame, video.uuid, e, exc_info=True)
116
128
 
@@ -32,13 +32,15 @@ def _get_raw_file_path(video: "VideoFile") -> Optional[Path]:
32
32
  if sensitive_path.exists():
33
33
  return sensitive_path.resolve()
34
34
 
35
+ # Check direct raw_file.path if available
35
36
  # Check direct raw_file.path if available
36
37
  try:
37
38
  direct_path = Path(video.raw_file.path)
38
39
  if direct_path.exists():
39
40
  return direct_path.resolve()
40
- except Exception:
41
- pass # Fallback to original behavior
41
+ except Exception as e:
42
+ logger.debug("Could not access direct raw_file.path for video %s: %s", video.uuid, e)
43
+ # Fallback to checking alternative paths
42
44
 
43
45
  # Check common alternative paths
44
46
  alternative_paths = [
@@ -13,7 +13,7 @@ else:
13
13
  ENDOREG_CENTER_ID = settings.ENDOREG_CENTER_ID
14
14
 
15
15
  # Import the new utility function
16
- from ...utils.video.ffmpeg_wrapper import get_stream_info
16
+ from ...utils.video import ffmpeg_wrapper
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -214,7 +214,7 @@ class FFMpegMeta(models.Model):
214
214
  """
215
215
  logger.info("Running ffprobe on %s", file_path)
216
216
  try:
217
- probe_data = get_stream_info(file_path) # Use the new utility
217
+ probe_data = ffmpeg_wrapper.get_stream_info(file_path) # Use the new utility
218
218
  except Exception as probe_err:
219
219
  logger.error("ffprobe execution failed for %s: %s", file_path, probe_err, exc_info=True)
220
220
  raise RuntimeError(f"ffprobe execution failed for {file_path}") from probe_err
@@ -28,6 +28,9 @@ class SensitiveMetaValidateSerializer(serializers.Serializer):
28
28
  patient_gender = serializers.CharField(required=False, allow_blank=True)
29
29
  center_name = serializers.CharField(required=False, allow_blank=True)
30
30
  is_verified = serializers.BooleanField(required=False, default=True)
31
+ file_type = serializers.ChoiceField(
32
+ choices=['video', 'pdf'], required=False
33
+ ) # Optional: "video" oder "pdf"
31
34
 
32
35
  def validate_patient_dob(self, value):
33
36
  """