endoreg-db 0.8.3.3__py3-none-any.whl → 0.8.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (41) hide show
  1. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +23 -1
  2. endoreg_db/data/setup_config.yaml +38 -0
  3. endoreg_db/management/commands/create_model_meta_from_huggingface.py +1 -2
  4. endoreg_db/management/commands/load_ai_model_data.py +18 -15
  5. endoreg_db/management/commands/setup_endoreg_db.py +218 -33
  6. endoreg_db/models/media/pdf/raw_pdf.py +241 -97
  7. endoreg_db/models/media/video/pipe_1.py +30 -33
  8. endoreg_db/models/media/video/video_file.py +300 -187
  9. endoreg_db/models/medical/hardware/endoscopy_processor.py +10 -1
  10. endoreg_db/models/metadata/model_meta_logic.py +34 -45
  11. endoreg_db/models/metadata/sensitive_meta_logic.py +555 -150
  12. endoreg_db/serializers/__init__.py +26 -55
  13. endoreg_db/serializers/misc/__init__.py +1 -1
  14. endoreg_db/serializers/misc/file_overview.py +65 -35
  15. endoreg_db/serializers/misc/{vop_patient_data.py → sensitive_patient_data.py} +1 -1
  16. endoreg_db/serializers/video_examination.py +198 -0
  17. endoreg_db/services/lookup_service.py +228 -58
  18. endoreg_db/services/lookup_store.py +174 -30
  19. endoreg_db/services/pdf_import.py +585 -282
  20. endoreg_db/services/video_import.py +493 -240
  21. endoreg_db/urls/__init__.py +36 -23
  22. endoreg_db/urls/label_video_segments.py +2 -0
  23. endoreg_db/urls/media.py +103 -66
  24. endoreg_db/utils/setup_config.py +177 -0
  25. endoreg_db/views/__init__.py +5 -3
  26. endoreg_db/views/media/pdf_media.py +3 -1
  27. endoreg_db/views/media/video_media.py +1 -1
  28. endoreg_db/views/media/video_segments.py +187 -259
  29. endoreg_db/views/pdf/__init__.py +5 -8
  30. endoreg_db/views/pdf/pdf_stream.py +186 -0
  31. endoreg_db/views/pdf/reimport.py +110 -94
  32. endoreg_db/views/requirement/lookup.py +171 -287
  33. endoreg_db/views/video/__init__.py +0 -2
  34. endoreg_db/views/video/video_examination_viewset.py +202 -289
  35. {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/METADATA +1 -2
  36. {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/RECORD +38 -37
  37. endoreg_db/views/pdf/pdf_media.py +0 -239
  38. endoreg_db/views/pdf/pdf_stream_views.py +0 -127
  39. endoreg_db/views/video/video_media.py +0 -158
  40. {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/WHEEL +0 -0
  41. {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,96 +1,100 @@
1
1
  """Concrete model for video files, handling both raw and processed states."""
2
2
 
3
3
  import logging
4
- from pathlib import Path
4
+ import os
5
5
  import uuid
6
+ from pathlib import Path
6
7
  from typing import TYPE_CHECKING, Optional, Union, cast
7
8
 
8
- from django.db import models
9
9
  from django.core.files import File
10
- from django.db.models.fields.files import FieldFile
11
10
  from django.core.validators import FileExtensionValidator
11
+ from django.db import models
12
12
  from django.db.models import F
13
+ from django.db.models.fields.files import FieldFile
14
+
13
15
  from endoreg_db.utils.calc_duration_seconds import _calc_duration_vf
14
16
 
17
+ from ...label import Label, LabelVideoSegment
18
+ from ...state import VideoState
19
+ from ...utils import ANONYM_VIDEO_DIR, VIDEO_DIR
20
+
15
21
  # --- Import model-specific function modules ---
16
22
  from .create_from_file import _create_from_file
23
+ from .pipe_1 import _pipe_1, _test_after_pipe_1
24
+ from .pipe_2 import _pipe_2
25
+ from .video_file_ai import _extract_text_from_video_frames, _predict_video_pipeline
17
26
  from .video_file_anonymize import (
18
27
  _anonymize,
19
- _create_anonymized_frame_files,
20
28
  _cleanup_raw_assets,
21
- )
22
- from .video_file_meta import (
23
- _update_text_metadata,
24
- _update_video_meta,
25
- _get_fps,
26
- _get_endo_roi,
27
- _get_crop_template,
28
- _initialize_video_specs,
29
+ _create_anonymized_frame_files,
29
30
  )
30
31
  from .video_file_frames import (
31
- _extract_frames,
32
- _initialize_frames,
32
+ _bulk_create_frames,
33
+ _create_frame_object,
33
34
  _delete_frames,
35
+ _extract_frames,
36
+ _get_frame,
37
+ _get_frame_number,
34
38
  _get_frame_path,
35
39
  _get_frame_paths,
36
- _get_frame_number,
37
- _get_frames,
38
- _get_frame,
39
40
  _get_frame_range,
40
- _create_frame_object,
41
- _bulk_create_frames,
41
+ _get_frames,
42
+ _initialize_frames,
42
43
  )
44
+
43
45
  # Update import aliases for clarity and to use as helpers
44
- from .video_file_frames._manage_frame_range import _extract_frame_range as _extract_frame_range_helper
45
- from .video_file_frames._manage_frame_range import _delete_frame_range as _delete_frame_range_helper
46
+ from .video_file_frames._manage_frame_range import (
47
+ _delete_frame_range as _delete_frame_range_helper,
48
+ )
49
+ from .video_file_frames._manage_frame_range import (
50
+ _extract_frame_range as _extract_frame_range_helper,
51
+ )
46
52
  from .video_file_io import (
47
53
  _delete_with_file,
48
54
  _get_base_frame_dir,
49
- _set_frame_dir,
50
55
  _get_frame_dir_path,
51
- _get_temp_anonymized_frame_dir,
52
- _get_target_anonymized_video_path,
53
- _get_raw_file_path,
54
56
  _get_processed_file_path,
57
+ _get_raw_file_path,
58
+ _get_target_anonymized_video_path,
59
+ _get_temp_anonymized_frame_dir,
60
+ _set_frame_dir,
55
61
  )
56
- from .video_file_ai import (
57
- _predict_video_pipeline,
58
- _extract_text_from_video_frames,
62
+ from .video_file_meta import (
63
+ _get_crop_template,
64
+ _get_endo_roi,
65
+ _get_fps,
66
+ _initialize_video_specs,
67
+ _update_text_metadata,
68
+ _update_video_meta,
59
69
  )
60
70
 
61
- from .pipe_1 import _pipe_1, _test_after_pipe_1
62
- from .pipe_2 import _pipe_2
63
-
64
- from ...utils import VIDEO_DIR, ANONYM_VIDEO_DIR
65
- from ...state import VideoState
66
- from ...label import LabelVideoSegment, Label
67
-
68
-
69
71
  # Configure logging
70
72
  logger = logging.getLogger(__name__) # Changed from "video_file"
71
73
 
72
74
  if TYPE_CHECKING:
73
75
  from endoreg_db.models import (
74
76
  Center,
77
+ EndoscopyProcessor,
78
+ FFMpegMeta,
75
79
  Frame,
80
+ ModelMeta,
81
+ Patient,
82
+ PatientExamination,
76
83
  SensitiveMeta,
77
- EndoscopyProcessor,
84
+ VideoImportMeta,
78
85
  VideoMeta,
79
- PatientExamination,
80
- Patient,
81
86
  VideoState,
82
- ModelMeta,
83
- VideoImportMeta,
84
- FFMpegMeta,
85
- )
87
+ )
88
+
89
+
86
90
  class VideoQuerySet(models.QuerySet):
87
91
  def next_after(self, last_id=None):
88
92
  """
89
93
  Return the next VideoFile instance with a primary key greater than the given last_id.
90
-
94
+
91
95
  Parameters:
92
96
  last_id (int or None): The primary key to start after. If None or invalid, returns the first instance.
93
-
97
+
94
98
  Returns:
95
99
  VideoFile or None: The next VideoFile instance, or None if not found.
96
100
  """
@@ -102,9 +106,10 @@ class VideoQuerySet(models.QuerySet):
102
106
  q = self if last_id is None else self.filter(pk__gt=last_id)
103
107
  return q.order_by("pk").first()
104
108
 
109
+
105
110
  class VideoFile(models.Model):
106
111
  uuid = models.UUIDField(default=uuid.uuid4, editable=False, unique=True)
107
-
112
+
108
113
  objects = VideoQuerySet.as_manager()
109
114
 
110
115
  raw_file = models.FileField(
@@ -120,55 +125,81 @@ class VideoFile(models.Model):
120
125
  blank=True,
121
126
  )
122
127
 
123
- video_hash = models.CharField(max_length=255, unique=True, help_text="Hash of the raw video file.")
128
+ video_hash = models.CharField(
129
+ max_length=255, unique=True, help_text="Hash of the raw video file."
130
+ )
124
131
  processed_video_hash = models.CharField(
125
- max_length=255, unique=True, null=True, blank=True, help_text="Hash of the processed video file, unique if not null."
132
+ max_length=255,
133
+ unique=True,
134
+ null=True,
135
+ blank=True,
136
+ help_text="Hash of the processed video file, unique if not null.",
126
137
  )
127
138
 
128
139
  sensitive_meta = models.OneToOneField(
129
- "SensitiveMeta", on_delete=models.SET_NULL, null=True, blank=True, related_name="video_file"
130
- ) # type: ignore
131
- center = models.ForeignKey("Center", on_delete=models.PROTECT) # type: ignore
140
+ "SensitiveMeta",
141
+ on_delete=models.SET_NULL,
142
+ null=True,
143
+ blank=True,
144
+ related_name="video_file",
145
+ ) # type: ignore
146
+ center = models.ForeignKey("Center", on_delete=models.PROTECT) # type: ignore
132
147
  processor = models.ForeignKey(
133
148
  "EndoscopyProcessor", on_delete=models.PROTECT, blank=True, null=True
134
- ) # type: ignore
149
+ ) # type: ignore
135
150
  video_meta = models.OneToOneField(
136
- "VideoMeta", on_delete=models.SET_NULL, null=True, blank=True, related_name="video_file"
137
- ) # type: ignore
151
+ "VideoMeta",
152
+ on_delete=models.SET_NULL,
153
+ null=True,
154
+ blank=True,
155
+ related_name="video_file",
156
+ ) # type: ignore
138
157
  examination = models.ForeignKey(
139
158
  "PatientExamination",
140
159
  on_delete=models.SET_NULL,
141
160
  blank=True,
142
161
  null=True,
143
162
  related_name="video_files",
144
- ) # type: ignore
163
+ ) # type: ignore
145
164
  patient = models.ForeignKey(
146
165
  "Patient",
147
166
  on_delete=models.SET_NULL,
148
167
  blank=True,
149
168
  null=True,
150
169
  related_name="video_files",
151
- ) # type: ignore
170
+ ) # type: ignore
152
171
  ai_model_meta = models.ForeignKey(
153
172
  "ModelMeta", on_delete=models.SET_NULL, blank=True, null=True
154
- ) # type: ignore
173
+ ) # type: ignore
155
174
  state = models.OneToOneField(
156
- "VideoState", on_delete=models.SET_NULL, null=True, blank=True, related_name="video_file"
157
- ) # type: ignore
175
+ "VideoState",
176
+ on_delete=models.SET_NULL,
177
+ null=True,
178
+ blank=True,
179
+ related_name="video_file",
180
+ ) # type: ignore
158
181
  import_meta = models.OneToOneField(
159
182
  "VideoImportMeta", on_delete=models.CASCADE, blank=True, null=True
160
- ) # type: ignore
183
+ ) # type: ignore
161
184
 
162
185
  original_file_name = models.CharField(max_length=255, blank=True, null=True)
163
186
  uploaded_at = models.DateTimeField(auto_now_add=True)
164
- frame_dir = models.CharField(max_length=512, blank=True, help_text="Path to frames extracted from the raw video.")
187
+ frame_dir = models.CharField(
188
+ max_length=512,
189
+ blank=True,
190
+ help_text="Path to frames extracted from the raw video.",
191
+ )
165
192
  fps = models.FloatField(blank=True, null=True)
166
193
  duration = models.FloatField(blank=True, null=True)
167
194
  frame_count = models.IntegerField(blank=True, null=True)
168
195
  width = models.IntegerField(blank=True, null=True)
169
196
  height = models.IntegerField(blank=True, null=True)
170
197
  suffix = models.CharField(max_length=10, blank=True, null=True)
171
- sequences = models.JSONField(default=dict, blank=True, help_text="AI prediction sequences based on raw frames.")
198
+ sequences = models.JSONField(
199
+ default=dict,
200
+ blank=True,
201
+ help_text="AI prediction sequences based on raw frames.",
202
+ )
172
203
  date = models.DateField(blank=True, null=True)
173
204
  meta = models.JSONField(blank=True, null=True)
174
205
  date_created = models.DateTimeField(auto_now_add=True)
@@ -187,16 +218,16 @@ class VideoFile(models.Model):
187
218
  ai_model_meta: "ModelMeta"
188
219
  import_meta: "VideoImportMeta"
189
220
 
190
-
191
221
  @property
192
222
  def ffmpeg_meta(self) -> "FFMpegMeta":
193
223
  """
194
224
  Return the associated FFMpegMeta instance for this video, initializing video specs if necessary.
195
-
225
+
196
226
  Returns:
197
227
  FFMpegMeta: The FFMpegMeta object containing metadata for this video.
198
228
  """
199
229
  from endoreg_db.models import FFMpegMeta
230
+
200
231
  if self.video_meta is not None:
201
232
  if self.video_meta.ffmpeg_meta is not None:
202
233
  return self.video_meta.ffmpeg_meta
@@ -207,47 +238,26 @@ class VideoFile(models.Model):
207
238
  assert isinstance(ffmpeg_meta, FFMpegMeta), "Expected FFMpegMeta instance."
208
239
  return ffmpeg_meta
209
240
 
241
+ # Exception message constants
242
+
243
+ NO_ACTIVE_FILE = "Has no raw file"
244
+ NO_FILE_ASSOCIATED = "Active file has no associated file."
210
245
 
211
246
  @property
212
- def active_file_url(self) -> str:
213
- """
214
- Return the URL of the active processed file.
215
-
216
- Returns:
217
- str: The URL of the active video file.
218
-
219
- Raises:
220
- Value Error if no active VideoFile is available.
221
- """
222
- active = self.active_file
223
- if not isinstance(active, FieldFile):
224
- raise ValueError("Active file is not a stored FieldFile instance.")
225
- if not active.name:
226
- raise ValueError("Active file has no associated name.")
227
- return active.url
228
-
229
- @property
230
- def active_raw_file(self) -> FieldFile:
231
- raw = self.raw_file
232
- if isinstance(raw, FieldFile) and raw.name:
233
- return raw
234
- raise ValueError("No raw file available for this video")
235
-
236
- @property
237
- def active_raw_file_url(self)-> str:
238
- """
239
- Return the path of the URL of the active raw file for name reading.
247
+ def active_raw_file(self) -> File:
248
+ """Return the raw file if available, otherwise raise ValueError."""
249
+ if self.has_raw:
250
+ return self.raw_file
251
+ raise ValueError(self.NO_ACTIVE_FILE)
240
252
 
241
- Raises:
242
- ValueError("Active file has no associated file")
243
-
244
- Returns:
245
- """
246
- raw = self.active_raw_file
247
- if not raw.name:
248
- raise ValueError("Active raw file has no associated name.")
249
- return raw.url
250
-
253
+ @property
254
+ def active_raw_file_url(self) -> str:
255
+ """Return the URL of the active raw file, or raise ValueError if unavailable."""
256
+ _file = self.active_raw_file
257
+ assert _file is not None, self.NO_ACTIVE_FILE
258
+ if not _file or not _file.name:
259
+ raise ValueError(self.NO_FILE_ASSOCIATED)
260
+ return _file.url
251
261
 
252
262
  # Pipeline Functions
253
263
  pipe_1 = _pipe_1
@@ -275,35 +285,39 @@ class VideoFile(models.Model):
275
285
  create_frame_object = _create_frame_object
276
286
  bulk_create_frames = _bulk_create_frames
277
287
 
278
-
279
-
280
288
  # Define new methods that call the helper functions
281
- def extract_specific_frame_range(self, start_frame: int, end_frame: int, overwrite: bool = False, **kwargs) -> bool:
289
+ def extract_specific_frame_range(
290
+ self, start_frame: int, end_frame: int, overwrite: bool = False, **kwargs
291
+ ) -> bool:
282
292
  """
283
293
  Extract frames from the video within the specified frame range.
284
-
294
+
285
295
  Parameters:
286
296
  start_frame (int): The starting frame number (inclusive).
287
297
  end_frame (int): The ending frame number (exclusive).
288
298
  overwrite (bool): Whether to overwrite existing frames in the range.
289
-
299
+
290
300
  Returns:
291
301
  bool: True if frame extraction was successful, False otherwise.
292
-
302
+
293
303
  Additional keyword arguments:
294
304
  quality (int, optional): Quality setting for extracted frames.
295
305
  ext (str, optional): File extension for extracted frames.
296
306
  verbose (bool, optional): Whether to enable verbose output.
297
307
  """
298
- quality = kwargs.get('quality', 2)
299
- ext = kwargs.get('ext', "jpg")
300
- verbose = kwargs.get('verbose', False)
308
+ quality = kwargs.get("quality", 2)
309
+ ext = kwargs.get("ext", "jpg")
310
+ verbose = kwargs.get("verbose", False)
301
311
 
302
312
  # Log if unexpected kwargs are passed, beyond those used by the helper
303
- expected_helper_kwargs = {'quality', 'ext', 'verbose'}
304
- unexpected_kwargs = {k: v for k, v in kwargs.items() if k not in expected_helper_kwargs}
313
+ expected_helper_kwargs = {"quality", "ext", "verbose"}
314
+ unexpected_kwargs = {
315
+ k: v for k, v in kwargs.items() if k not in expected_helper_kwargs
316
+ }
305
317
  if unexpected_kwargs:
306
- logger.warning(f"Unexpected keyword arguments for extract_specific_frame_range, will be ignored by helper: {unexpected_kwargs}")
318
+ logger.warning(
319
+ f"Unexpected keyword arguments for extract_specific_frame_range, will be ignored by helper: {unexpected_kwargs}"
320
+ )
307
321
 
308
322
  return _extract_frame_range_helper(
309
323
  video=self,
@@ -312,7 +326,7 @@ class VideoFile(models.Model):
312
326
  quality=quality,
313
327
  overwrite=overwrite,
314
328
  ext=ext,
315
- verbose=verbose
329
+ verbose=verbose,
316
330
  )
317
331
 
318
332
  def delete_specific_frame_range(self, start_frame: int, end_frame: int) -> None:
@@ -320,9 +334,7 @@ class VideoFile(models.Model):
320
334
  Deletes frame files for a specific range [start_frame, end_frame).
321
335
  """
322
336
  _delete_frame_range_helper(
323
- video=self,
324
- start_frame=start_frame,
325
- end_frame=end_frame
337
+ video=self, start_frame=start_frame, end_frame=end_frame
326
338
  )
327
339
 
328
340
  delete_with_file = _delete_with_file
@@ -340,8 +352,6 @@ class VideoFile(models.Model):
340
352
 
341
353
  predict_video = _predict_video_pipeline
342
354
  extract_text_from_frames = _extract_text_from_video_frames
343
-
344
-
345
355
 
346
356
  @classmethod
347
357
  def check_hash_exists(cls, video_hash: str) -> bool:
@@ -360,16 +370,15 @@ class VideoFile(models.Model):
360
370
  Return True if a raw video file is associated with this instance.
361
371
  """
362
372
  return bool(self.raw_file and self.raw_file.name)
363
-
364
373
 
365
374
  @property
366
375
  def active_file(self) -> FieldFile:
367
376
  """
368
377
  Return the active video file, preferring the processed file if available.
369
-
378
+
370
379
  Returns:
371
380
  File: The processed file if present; otherwise, the raw file.
372
-
381
+
373
382
  Raises:
374
383
  ValueError: If neither a processed nor a raw file is available.
375
384
  """
@@ -381,17 +390,18 @@ class VideoFile(models.Model):
381
390
  if isinstance(raw, FieldFile) and raw.name:
382
391
  return raw
383
392
 
384
- raise ValueError("No active file available. VideoFile has neither raw nor processed file.")
385
-
393
+ raise ValueError(
394
+ "No active file available. VideoFile has neither raw nor processed file."
395
+ )
386
396
 
387
397
  @property
388
398
  def active_file_path(self) -> Path:
389
399
  """
390
400
  Return the filesystem path of the active video file.
391
-
401
+
392
402
  Returns:
393
403
  Path: The path to the processed file if available, otherwise the raw file.
394
-
404
+
395
405
  Raises:
396
406
  ValueError: If neither a processed nor raw file is present.
397
407
  """
@@ -401,29 +411,40 @@ class VideoFile(models.Model):
401
411
  elif active is self.raw_file:
402
412
  path = _get_raw_file_path(self)
403
413
  else:
404
- raise ValueError("No active file path available. VideoFile has neither raw nor processed file.")
414
+ raise ValueError(
415
+ "No active file path available. VideoFile has neither raw nor processed file."
416
+ )
405
417
 
406
418
  if path is None:
407
419
  raise ValueError("Active file path could not be resolved.")
408
420
  return path
409
421
 
410
-
411
422
  @classmethod
412
- def create_from_file(cls, file_path: Union[str, Path], center_name: str, **kwargs) -> Optional["VideoFile"]:
423
+ def create_from_file(
424
+ cls, file_path: Union[str, Path], center_name: str, **kwargs
425
+ ) -> Optional["VideoFile"]:
413
426
  # Ensure file_path is a Path object
414
427
  if isinstance(file_path, str):
415
428
  file_path = Path(file_path)
416
429
  # Pass center_name and other kwargs to the helper function
430
+ if not center_name:
431
+ try:
432
+ center_name = os.environ["CENTER_NAME"]
433
+ except KeyError:
434
+ logger.error(
435
+ "Center name must be provided to create VideoFile from file. You can set CENTER_NAME in environment variables."
436
+ )
437
+ return None
417
438
  return _create_from_file(cls, file_path, center_name=center_name, **kwargs)
418
439
 
419
440
  @classmethod
420
441
  def create_from_file_initialized(
421
442
  cls,
422
443
  file_path: Union[str, Path],
423
- center_name:str,
444
+ center_name: str,
424
445
  processor_name: Optional[str] = None,
425
- delete_source:bool = False,
426
- save_video_file:bool = True, # Add this line
446
+ delete_source: bool = False,
447
+ save_video_file: bool = True, # Add this line
427
448
  ):
428
449
  """
429
450
  Creates a VideoFile instance from a given video file path.
@@ -441,16 +462,16 @@ class VideoFile(models.Model):
441
462
  center_name=center_name,
442
463
  processor_name=processor_name,
443
464
  delete_source=delete_source,
444
- save=save_video_file, # Add this line
465
+ save=save_video_file, # Add this line
445
466
  )
446
467
 
447
468
  video_file = video_file.initialize()
448
469
  return video_file
449
-
470
+
450
471
  def delete(self, using=None, keep_parents=False) -> tuple[int, dict[str, int]]:
451
472
  """
452
473
  Delete the VideoFile instance, including associated files and frames.
453
-
474
+
454
475
  Overrides the default delete method to ensure proper cleanup of related resources.
455
476
  """
456
477
  # Ensure frames are deleted before the main instance
@@ -463,16 +484,18 @@ class VideoFile(models.Model):
463
484
  # Delete associated files if they exist
464
485
  if active_path.exists():
465
486
  active_path.unlink(missing_ok=True)
466
-
487
+
467
488
  # Delete file storage
468
489
  if self.raw_file and self.raw_file.storage.exists(self.raw_file.name):
469
490
  self.raw_file.storage.delete(self.raw_file.name)
470
- if self.processed_file and self.processed_file.storage.exists(self.processed_file.name):
491
+ if self.processed_file and self.processed_file.storage.exists(
492
+ self.processed_file.name
493
+ ):
471
494
  self.processed_file.storage.delete(self.processed_file.name)
472
-
495
+
473
496
  # Use proper database connection
474
497
  if using is None:
475
- using = 'default'
498
+ using = "default"
476
499
 
477
500
  raw_file_path = self.get_raw_file_path()
478
501
  if raw_file_path:
@@ -484,7 +507,7 @@ class VideoFile(models.Model):
484
507
  logger.info(f"Removed processing lock: {lock_path}")
485
508
  except Exception as e:
486
509
  logger.warning(f"Could not remove processing lock {lock_path}: {e}")
487
-
510
+
488
511
  try:
489
512
  # Call parent delete with proper parameters
490
513
  result = super().delete(using=using, keep_parents=keep_parents)
@@ -494,41 +517,77 @@ class VideoFile(models.Model):
494
517
  logger.error(f"Error deleting VideoFile {self.uuid}: {e}")
495
518
  raise
496
519
 
497
- def validate_metadata_annotation(self, extracted_data_dict: Optional[dict] = None) -> bool:
520
+ def validate_metadata_annotation(
521
+ self, extracted_data_dict: Optional[dict] = None
522
+ ) -> bool:
498
523
  """
499
524
  Validate the metadata of the VideoFile instance.
500
-
501
- Called after annotation in the frontend, this method deletes the associated active file, updates the sensitive meta data with the user annotated data.
502
- It also ensures the video file is properly saved after the metadata update.
525
+
526
+ Called after annotation in the frontend, this method:
527
+ 1. Updates sensitive metadata with user-annotated data
528
+ 2. Deletes the RAW video file (keeping only the anonymized version)
529
+ 3. Marks the video as validated
530
+
531
+ **IMPORTANT:** Only the raw video is deleted. The processed (anonymized)
532
+ video is preserved as the final validated output.
503
533
  """
534
+ from datetime import date as dt_date
535
+
504
536
  from endoreg_db.models import SensitiveMeta
537
+
505
538
  if not self.sensitive_meta:
506
- self.sensitive_meta = SensitiveMeta.objects.create(center=self.center)
507
-
508
- # Delete the active file to ensure it is reprocessed with the new metadata
509
- if self.active_file_path.exists():
510
- self.active_file_path.unlink(missing_ok=True)
511
-
512
- # Update sensitive metadata with user annotations
513
- sensitive_meta = _update_text_metadata(self, extracted_data_dict, overwrite=True)
514
-
539
+ # CRITICAL FIX: Use create_from_dict with default patient data
540
+ default_data = {
541
+ "patient_first_name": "Patient",
542
+ "patient_last_name": "Unknown",
543
+ "patient_dob": dt_date(1990, 1, 1),
544
+ "examination_date": dt_date.today(),
545
+ "center": self.center,
546
+ }
547
+ self.sensitive_meta = SensitiveMeta.create_from_dict(default_data)
548
+
549
+ # CRITICAL FIX: Delete RAW video file, not the processed (anonymized) one
550
+ # CRITICAL: Update metadata BEFORE deleting raw video
551
+ # Metadata update may trigger frame extraction, which needs raw video
552
+ sensitive_meta = _update_text_metadata(
553
+ self, extracted_data_dict, overwrite=True
554
+ )
555
+
556
+ # After validation and metadata update, only the anonymized video should remain
557
+ from .video_file_io import _get_raw_file_path
558
+
559
+ raw_path = _get_raw_file_path(self)
560
+ if raw_path and raw_path.exists():
561
+ logger.info(f"Deleting raw video file after validation: {raw_path}")
562
+ raw_path.unlink(missing_ok=True)
563
+ # Clear the raw_file field in database (use delete() to avoid save issues)
564
+ if self.raw_file:
565
+ self.raw_file.delete(save=False)
566
+ logger.info(
567
+ f"Raw video deleted for {self.uuid}. Anonymized video preserved."
568
+ )
569
+ else:
570
+ logger.warning(f"Raw video file not found for deletion: {self.uuid}")
571
+
515
572
  if sensitive_meta:
516
573
  # Mark as processed after validation
517
574
  self.get_or_create_state().mark_sensitive_meta_processed(save=True)
518
575
  # Save the VideoFile instance to persist changes
519
576
  self.save()
520
- logger.info(f"Metadata annotation validated and saved for video {self.uuid}.")
577
+ logger.info(
578
+ f"Metadata annotation validated and saved for video {self.uuid}."
579
+ )
521
580
  return True
522
581
  else:
523
- logger.error(f"Failed to validate metadata annotation for video {self.uuid}.")
582
+ logger.error(
583
+ f"Failed to validate metadata annotation for video {self.uuid}."
584
+ )
524
585
  return False
525
-
526
-
527
-
586
+
528
587
  def initialize(self):
529
588
  """
530
589
  Initialize the VideoFile instance by updating metadata, setting up video specs, assigning frame directory, ensuring related state and sensitive metadata exist, saving the instance, and initializing frames.
531
-
590
+
532
591
  Returns:
533
592
  VideoFile: The initialized VideoFile instance.
534
593
  """
@@ -548,7 +607,6 @@ class VideoFile(models.Model):
548
607
  # Initialize frames based on the video specs
549
608
  self.initialize_frames()
550
609
 
551
-
552
610
  return self
553
611
 
554
612
  def __str__(self):
@@ -557,7 +615,9 @@ class VideoFile(models.Model):
557
615
  """
558
616
  active_path = self.active_file_path
559
617
  file_name = active_path.name if active_path else "No file"
560
- state = "Processed" if self.is_processed else ("Raw" if self.has_raw else "No File")
618
+ state = (
619
+ "Processed" if self.is_processed else ("Raw" if self.has_raw else "No File")
620
+ )
561
621
  return f"VideoFile ({state}): {file_name} (UUID: {self.uuid})"
562
622
 
563
623
  # --- Convenience state/meta helpers used in tests and admin workflows ---
@@ -586,7 +646,7 @@ class VideoFile(models.Model):
586
646
  # Now call the original save method
587
647
  """
588
648
  Saves the VideoFile instance to the database.
589
-
649
+
590
650
  Overrides the default save method to persist changes to the VideoFile model.
591
651
  """
592
652
  super().save(*args, **kwargs)
@@ -618,23 +678,71 @@ class VideoFile(models.Model):
618
678
  def get_or_create_sensitive_meta(self) -> "SensitiveMeta":
619
679
  """
620
680
  Retrieve the associated SensitiveMeta instance for this video, creating and assigning one if it does not exist.
621
-
681
+
682
+ **Two-Phase Patient Data Pattern:**
683
+ This method implements a two-phase approach to handle incomplete patient data:
684
+
685
+ **Phase 1: Initial Creation (with defaults)**
686
+ - Creates SensitiveMeta with default patient data to prevent hash calculation errors
687
+ - Default values: patient_first_name="Patient", patient_last_name="Unknown", patient_dob=1990-01-01
688
+ - Allows video import to proceed even without extracted patient data
689
+ - Temporary hash and pseudo-entities are created
690
+
691
+ **Phase 2: Update (with extracted data)**
692
+ - Real patient data is extracted later (e.g., from video OCR via lx_anonymizer)
693
+ - update_from_dict() is called with actual patient information
694
+ - Hash is recalculated automatically using real data
695
+ - Correct pseudo-entities are created/linked based on new hash
696
+
697
+ **Example workflow:**
698
+ ```python
699
+ # Phase 1: Video creation
700
+ video = VideoFile.create_from_file_initialized(...)
701
+ video.initialize() # Calls this method
702
+ # → SensitiveMeta created with defaults
703
+ # → Hash: sha256("Patient Unknown 1990-01-01...")
704
+
705
+ # Phase 2: Frame cleaning extracts real data
706
+ extracted = {"patient_first_name": "Max", "patient_last_name": "Mustermann", ...}
707
+ video.sensitive_meta.update_from_dict(extracted)
708
+ # → Hash: sha256("Max Mustermann 1985-03-15...") (RECALCULATED)
709
+ ```
710
+
622
711
  Returns:
623
712
  SensitiveMeta: The related SensitiveMeta instance.
713
+
714
+ See Also:
715
+ - sensitive_meta_logic.perform_save_logic() for hash calculation details
716
+ - sensitive_meta_logic.update_sensitive_meta_from_dict() for update mechanism
624
717
  """
718
+ from datetime import date as dt_date
719
+
625
720
  from endoreg_db.models import SensitiveMeta
721
+
626
722
  if self.sensitive_meta is None:
627
- self.sensitive_meta = SensitiveMeta.objects.create(center = self.center)
628
- # Do not mark processed here; it will be set after extraction/validation steps
723
+ # Use create_from_dict with default patient data
724
+ # to prevent "First name is required to calculate patient hash" error
725
+ default_data = {
726
+ "patient_first_name": "Patient",
727
+ "patient_last_name": "Unknown",
728
+ "patient_dob": dt_date(1990, 1, 1),
729
+ "examination_date": dt_date.today(),
730
+ "center": self.center,
731
+ }
732
+ self.sensitive_meta = SensitiveMeta.create_from_dict(default_data)
733
+ self.save(update_fields=["sensitive_meta"])
734
+ # Do not mark state as processed here; it will be set after extraction/validation steps
629
735
  return self.sensitive_meta
630
736
 
631
- def get_outside_segments(self, only_validated: bool = False) -> models.QuerySet["LabelVideoSegment"]:
737
+ def get_outside_segments(
738
+ self, only_validated: bool = False
739
+ ) -> models.QuerySet["LabelVideoSegment"]:
632
740
  """
633
741
  Return all video segments labeled as "outside" for this video.
634
-
742
+
635
743
  Parameters:
636
744
  only_validated (bool): If True, only segments with a validated state are included.
637
-
745
+
638
746
  Returns:
639
747
  QuerySet: A queryset of LabelVideoSegment instances labeled as "outside". Returns an empty queryset if the label does not exist or an error occurs.
640
748
  """
@@ -651,43 +759,48 @@ class VideoFile(models.Model):
651
759
  logger.warning("Outside label not found in the database.")
652
760
  return self.label_video_segments.none()
653
761
  except Exception as e:
654
- logger.error("Error getting outside segments for video %s: %s", self.uuid, e, exc_info=True)
762
+ logger.error(
763
+ "Error getting outside segments for video %s: %s",
764
+ self.uuid,
765
+ e,
766
+ exc_info=True,
767
+ )
655
768
  return self.label_video_segments.none()
656
-
769
+
657
770
  @classmethod
658
771
  def get_all_videos(cls) -> models.QuerySet["VideoFile"]:
659
772
  """
660
773
  Returns a queryset containing all VideoFile records.
661
-
774
+
662
775
  This class method retrieves every VideoFile instance in the database without filtering.
663
776
  """
664
777
  return cast(models.QuerySet["VideoFile"], cls.objects.all())
665
-
778
+
666
779
  def count_unmodified_others(self) -> int:
667
780
  """
668
781
  Count the number of other VideoFile instances that have not been modified since creation.
669
-
782
+
670
783
  Returns:
671
784
  int: The count of VideoFile records, excluding this instance, where the modification timestamp matches the creation timestamp.
672
785
  """
673
786
  return (
674
- VideoFile.objects
675
- .filter(date_modified=F('date_created')) # compare the two fields in SQL
676
- .exclude(pk=self.pk) # exclude this instance
677
- .count() # run a fast COUNT(*) on the filtered set
787
+ VideoFile.objects.filter(
788
+ date_modified=F("date_created")
789
+ ) # compare the two fields in SQL
790
+ .exclude(pk=self.pk) # exclude this instance
791
+ .count() # run a fast COUNT(*) on the filtered set
678
792
  )
679
793
 
680
-
681
794
  def frame_number_to_s(self, frame_number: int) -> float:
682
795
  """
683
796
  Convert a frame number to its corresponding time in seconds based on the video's frames per second (FPS).
684
-
797
+
685
798
  Parameters:
686
799
  frame_number (int): The frame number to convert.
687
-
800
+
688
801
  Returns:
689
802
  float: The time in seconds corresponding to the given frame number.
690
-
803
+
691
804
  Raises:
692
805
  ValueError: If the video's FPS is not set or is less than or equal to zero.
693
806
  """
@@ -695,18 +808,18 @@ class VideoFile(models.Model):
695
808
  if fps is None or fps <= 0:
696
809
  raise ValueError("FPS must be set and greater than zero.")
697
810
  return frame_number / fps
698
-
811
+
699
812
  def get_video_by_id(self, video_id: int) -> "VideoFile":
700
813
  """
701
814
  Retrieve a VideoFile instance by its primary key (ID).
702
-
815
+
703
816
  Parameters:
704
817
  video_id (int): The primary key of the VideoFile to retrieve.
705
-
818
+
706
819
  Returns:
707
820
  VideoFile: The VideoFile instance with the specified ID.
708
-
821
+
709
822
  Raises:
710
823
  VideoFile.DoesNotExist: If no VideoFile with the given ID exists.
711
824
  """
712
- return self.objects.get(pk=video_id)
825
+ return self.objects.get(pk=video_id)