endoreg-db 0.8.3.7__py3-none-any.whl → 0.8.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +23 -1
  2. endoreg_db/data/setup_config.yaml +38 -0
  3. endoreg_db/management/commands/create_model_meta_from_huggingface.py +19 -5
  4. endoreg_db/management/commands/load_ai_model_data.py +18 -15
  5. endoreg_db/management/commands/setup_endoreg_db.py +218 -33
  6. endoreg_db/models/media/pdf/raw_pdf.py +241 -97
  7. endoreg_db/models/media/video/pipe_1.py +30 -33
  8. endoreg_db/models/media/video/video_file.py +300 -187
  9. endoreg_db/models/medical/hardware/endoscopy_processor.py +10 -1
  10. endoreg_db/models/metadata/model_meta_logic.py +63 -43
  11. endoreg_db/models/metadata/sensitive_meta_logic.py +251 -25
  12. endoreg_db/serializers/__init__.py +26 -55
  13. endoreg_db/serializers/misc/__init__.py +1 -1
  14. endoreg_db/serializers/misc/file_overview.py +65 -35
  15. endoreg_db/serializers/misc/{vop_patient_data.py → sensitive_patient_data.py} +1 -1
  16. endoreg_db/serializers/video_examination.py +198 -0
  17. endoreg_db/services/lookup_service.py +228 -58
  18. endoreg_db/services/lookup_store.py +174 -30
  19. endoreg_db/services/pdf_import.py +585 -282
  20. endoreg_db/services/video_import.py +485 -242
  21. endoreg_db/urls/__init__.py +36 -23
  22. endoreg_db/urls/label_video_segments.py +2 -0
  23. endoreg_db/urls/media.py +3 -2
  24. endoreg_db/utils/setup_config.py +177 -0
  25. endoreg_db/views/__init__.py +5 -3
  26. endoreg_db/views/media/pdf_media.py +3 -1
  27. endoreg_db/views/media/video_media.py +1 -1
  28. endoreg_db/views/media/video_segments.py +187 -259
  29. endoreg_db/views/pdf/__init__.py +5 -8
  30. endoreg_db/views/pdf/pdf_stream.py +187 -0
  31. endoreg_db/views/pdf/reimport.py +110 -94
  32. endoreg_db/views/requirement/lookup.py +171 -287
  33. endoreg_db/views/video/__init__.py +0 -2
  34. endoreg_db/views/video/video_examination_viewset.py +202 -289
  35. {endoreg_db-0.8.3.7.dist-info → endoreg_db-0.8.6.3.dist-info}/METADATA +1 -2
  36. {endoreg_db-0.8.3.7.dist-info → endoreg_db-0.8.6.3.dist-info}/RECORD +38 -37
  37. endoreg_db/views/pdf/pdf_media.py +0 -239
  38. endoreg_db/views/pdf/pdf_stream_views.py +0 -127
  39. endoreg_db/views/video/video_media.py +0 -158
  40. {endoreg_db-0.8.3.7.dist-info → endoreg_db-0.8.6.3.dist-info}/WHEEL +0 -0
  41. {endoreg_db-0.8.3.7.dist-info → endoreg_db-0.8.6.3.dist-info}/licenses/LICENSE +0 -0
@@ -160,7 +160,7 @@ class EndoscopyProcessor(models.Model):
160
160
  "height": self.endoscope_sn_height,
161
161
  }
162
162
 
163
- def get_rois(self) -> dict[ str, dict[str, int | None] | None]:
163
+ def get_rois(self) -> dict[str, dict[str, int | None] | None]:
164
164
  return {
165
165
  "endoscope_image": self.get_roi_endoscope_image(),
166
166
  "examination_date": self.get_roi_examination_date(),
@@ -171,3 +171,12 @@ class EndoscopyProcessor(models.Model):
171
171
  "endoscope_type": self.get_roi_endoscope_type(),
172
172
  "endoscope_sn": self.get_roi_endoscopy_sn(),
173
173
  }
174
+
175
+ def get_sensitive_rois(self) -> dict[str, dict[str, int | None] | None]:
176
+ return {
177
+ "examination_date": self.get_roi_examination_date(),
178
+ "examination_time": self.get_roi_examination_time(),
179
+ "patient_first_name": self.get_roi_patient_first_name(),
180
+ "patient_last_name": self.get_roi_patient_last_name(),
181
+ "patient_dob": self.get_roi_patient_dob(),
182
+ }
@@ -1,41 +1,45 @@
1
1
  import shutil
2
+ from logging import getLogger
2
3
  from pathlib import Path
3
- from typing import Optional, TYPE_CHECKING, Any, Type
4
- from huggingface_hub import hf_hub_download
4
+ from typing import TYPE_CHECKING, Any, Optional, Type
5
+
6
+ from django.core.files import File
5
7
  from django.db import transaction
8
+ from huggingface_hub import hf_hub_download
6
9
 
7
10
  # Assuming ModelMeta, AiModel, LabelSet are importable from the correct locations
8
11
  # Adjust imports based on your project structure if necessary
9
12
  from ..administration.ai.ai_model import AiModel
10
13
  from ..label.label_set import LabelSet
11
- from ..utils import WEIGHTS_DIR, STORAGE_DIR
12
-
13
- from logging import getLogger
14
+ from ..utils import STORAGE_DIR, WEIGHTS_DIR
14
15
 
15
16
  logger = getLogger("ai_model")
16
17
 
17
18
  if TYPE_CHECKING:
18
- from .model_meta import ModelMeta # Import ModelMeta for type hinting
19
+ from .model_meta import ModelMeta # Import ModelMeta for type hinting
20
+
19
21
 
22
+ def _get_model_meta_class():
23
+ """Lazy import to avoid circular imports"""
24
+ from .model_meta import ModelMeta
20
25
 
21
- def get_latest_version_number_logic(
22
- cls: Type["ModelMeta"], meta_name: str, model_name: str
23
- ) -> int:
26
+ return ModelMeta
27
+
28
+
29
+ def get_latest_version_number_logic(cls: Type["ModelMeta"], meta_name: str, model_name: str) -> int:
24
30
  """
25
31
  Finds the highest numerical version for a given meta_name and model_name.
26
32
  Iterates through all versions, attempts to parse them as integers,
27
33
  and returns the maximum integer found. If no numeric versions are found,
28
34
  returns 0.
29
35
  """
30
- versions_qs = cls.objects.filter(
31
- name=meta_name, model__name=model_name
32
- ).values_list('version', flat=True)
36
+ versions_qs = cls.objects.filter(name=meta_name, model__name=model_name).values_list("version", flat=True)
33
37
 
34
38
  max_v = 0
35
39
  found_numeric_version = False
36
40
 
37
41
  for v_str in versions_qs:
38
- if v_str is None: # Skip None versions
42
+ if v_str is None: # Skip None versions
39
43
  continue
40
44
  try:
41
45
  v_int = int(v_str)
@@ -47,13 +51,13 @@ def get_latest_version_number_logic(
47
51
  f"Warning: Could not parse version string '{v_str}' as an integer for "
48
52
  f"meta_name='{meta_name}', model_name='{model_name}' while determining the max version."
49
53
  )
50
-
54
+
51
55
  return max_v if found_numeric_version else 0
52
56
 
53
57
 
54
58
  @transaction.atomic
55
59
  def create_from_file_logic(
56
- cls: Type["ModelMeta"], # cls is ModelMeta
60
+ cls: Type["ModelMeta"], # cls is ModelMeta
57
61
  meta_name: str,
58
62
  model_name: str,
59
63
  labelset_name: str,
@@ -84,13 +88,10 @@ def create_from_file_logic(
84
88
 
85
89
  if requested_version:
86
90
  target_version = str(requested_version)
87
- existing = cls.objects.filter(
88
- name=meta_name, model=ai_model, version=target_version
89
- ).first()
91
+ existing = cls.objects.filter(name=meta_name, model=ai_model, version=target_version).first()
90
92
  if existing and not bump_if_exists:
91
93
  raise ValueError(
92
- f"ModelMeta '{meta_name}' version '{target_version}' for model '{model_name}' "
93
- f"already exists. Use bump_if_exists=True to increment."
94
+ f"ModelMeta '{meta_name}' version '{target_version}' for model '{model_name}' already exists. Use bump_if_exists=True to increment."
94
95
  )
95
96
  elif existing and bump_if_exists:
96
97
  target_version = str(latest_version_num + 1)
@@ -99,7 +100,6 @@ def create_from_file_logic(
99
100
  target_version = str(latest_version_num + 1)
100
101
  logger.info(f"Setting next version for {meta_name}/{model_name} to {target_version}")
101
102
 
102
-
103
103
  # --- Prepare Weights File ---
104
104
  source_weights_path = Path(weights_file).resolve()
105
105
  if not source_weights_path.exists():
@@ -125,8 +125,8 @@ def create_from_file_logic(
125
125
  # --- Create/Update ModelMeta Instance ---
126
126
  defaults = {
127
127
  "labelset": label_set,
128
- "weights": relative_dest_path.as_posix(), # Store relative path for FileField
129
- **kwargs, # Pass through other fields like activation, mean, std, etc.
128
+ "weights": relative_dest_path.as_posix(), # Store relative path for FileField
129
+ **kwargs, # Pass through other fields like activation, mean, std, etc.
130
130
  }
131
131
 
132
132
  # Remove None values from defaults to avoid overriding model defaults unnecessarily
@@ -147,40 +147,44 @@ def create_from_file_logic(
147
147
  # --- Optionally update AiModel's active_meta ---
148
148
  # You might want to add logic here to automatically set the newly created/updated
149
149
  # meta as the active one for the AiModel, e.g.:
150
- # ai_model.active_meta = model_meta
151
- # ai_model.save()
150
+ ai_model.active_meta = model_meta
151
+ ai_model.save()
152
152
 
153
153
  return model_meta
154
154
 
155
+
155
156
  # --- Add other logic functions referenced by ModelMeta here ---
156
157
  # (get_latest_version_number_logic, get_activation_function_logic, etc.)
157
158
  # Placeholder for get_activation_function_logic
158
159
  def get_activation_function_logic(activation_name: str):
159
- import torch.nn as nn # Import locally as it's specific to this function
160
+ import torch.nn as nn # Import locally as it's specific to this function
161
+
160
162
  if activation_name.lower() == "sigmoid":
161
163
  return nn.Sigmoid()
162
164
  elif activation_name.lower() == "softmax":
163
165
  # Note: Softmax usually requires specifying the dimension
164
- return nn.Softmax(dim=1) # Assuming dim=1 (channels) is common
166
+ return nn.Softmax(dim=1) # Assuming dim=1 (channels) is common
165
167
  elif activation_name.lower() == "none":
166
168
  return nn.Identity()
167
169
  else:
168
170
  # Consider adding more activations or raising an error
169
171
  raise ValueError(f"Unsupported activation function: {activation_name}")
170
172
 
173
+
171
174
  # Placeholder for get_inference_dataset_config_logic
172
175
  def get_inference_dataset_config_logic(model_meta: "ModelMeta") -> dict:
173
176
  # This would typically extract relevant fields from model_meta
174
177
  # for configuring a dataset during inference
175
178
  return {
176
- "mean": [float(x) for x in model_meta.mean.split(',')],
177
- "std": [float(x) for x in model_meta.std.split(',')],
178
- "size_y": model_meta.size_y, # Add size_y key
179
- "size_x": model_meta.size_x, # Add size_x key
180
- "axes": [int(x) for x in model_meta.axes.split(',')],
179
+ "mean": [float(x) for x in model_meta.mean.split(",")],
180
+ "std": [float(x) for x in model_meta.std.split(",")],
181
+ "size_y": model_meta.size_y, # Add size_y key
182
+ "size_x": model_meta.size_x, # Add size_x key
183
+ "axes": [int(x) for x in model_meta.axes.split(",")],
181
184
  # Add other relevant config like normalization type, etc.
182
185
  }
183
186
 
187
+
184
188
  # Placeholder for get_config_dict_logic
185
189
  def get_config_dict_logic(model_meta: "ModelMeta") -> dict:
186
190
  # Returns a dictionary representation of the model's configuration
@@ -202,6 +206,7 @@ def get_config_dict_logic(model_meta: "ModelMeta") -> dict:
202
206
  # Add any other relevant fields
203
207
  }
204
208
 
209
+
205
210
  # Placeholder for get_model_meta_by_name_version_logic
206
211
  def get_model_meta_by_name_version_logic(
207
212
  cls: Type["ModelMeta"],
@@ -222,22 +227,21 @@ def get_model_meta_by_name_version_logic(
222
227
  try:
223
228
  return cls.objects.get(name=meta_name, model=ai_model, version=version)
224
229
  except Exception as exc:
225
- raise cls.DoesNotExist(
226
- f"ModelMeta '{meta_name}' version '{version}' for model '{model_name}' not found."
227
- ) from exc
230
+ raise cls.DoesNotExist(f"ModelMeta '{meta_name}' version '{version}' for model '{model_name}' not found.") from exc
228
231
  else:
229
232
  # Get latest version
230
233
  latest = cls.objects.filter(name=meta_name, model=ai_model).order_by("-date_created").first()
231
234
  if latest:
232
235
  return latest
233
236
  else:
234
- raise cls.DoesNotExist(
235
- f"No ModelMeta found for '{meta_name}' and model '{model_name}'."
236
- )
237
-
238
- from huggingface_hub import model_info
237
+ raise cls.DoesNotExist(f"No ModelMeta found for '{meta_name}' and model '{model_name}'.")
238
+
239
+
239
240
  import re
240
241
 
242
+ from huggingface_hub import model_info
243
+
244
+
241
245
  def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
242
246
  """
243
247
  Infers default model metadata (activation, normalization, input size)
@@ -295,7 +299,8 @@ def infer_default_model_meta_from_hf(model_id: str) -> dict[str, Any]:
295
299
  "size_y": size_y,
296
300
  "description": f"Inferred defaults for {model_id}",
297
301
  }
298
-
302
+
303
+
299
304
  def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str | None = None):
300
305
  """
301
306
  Downloads model weights from Hugging Face and auto-fills ModelMeta fields.
@@ -303,10 +308,25 @@ def setup_default_from_huggingface_logic(cls, model_id: str, labelset_name: str
303
308
  meta = infer_default_model_meta_from_hf(model_id)
304
309
 
305
310
  # Download weights
306
- weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin", local_dir=WEIGHTS_DIR)
311
+ weights_path = hf_hub_download(
312
+ repo_id=model_id,
313
+ filename="colo_segmentation_RegNetX800MF_base.ckpt",
314
+ local_dir=WEIGHTS_DIR,
315
+ )
307
316
 
308
317
  ai_model, _ = AiModel.objects.get_or_create(name=meta["name"])
309
- labelset = LabelSet.objects.first() if not labelset_name else LabelSet.objects.get(name=labelset_name)
318
+ if not labelset_name:
319
+ labelset = LabelSet.objects.first()
320
+ if not labelset:
321
+ raise ValueError("No labelset found and no labelset_name provided")
322
+ else:
323
+ labelset = LabelSet.objects.get(name=labelset_name)
324
+
325
+ ModelMeta = _get_model_meta_class()
326
+ model_meta = ModelMeta.objects.filter(name=meta["name"], model=ai_model).first()
327
+ if model_meta:
328
+ logger.info(f"ModelMeta {meta['name']} for model {ai_model.name} already exists. Skipping creation.")
329
+ return model_meta
310
330
 
311
331
  return create_from_file_logic(
312
332
  cls,
@@ -162,6 +162,9 @@ def calculate_patient_hash(instance: "SensitiveMeta", salt: str = SECRET_SALT) -
162
162
  if not center:
163
163
  raise ValueError("Center is required to calculate patient hash.")
164
164
 
165
+ assert first_name is not None, "First name is required to calculate patient hash."
166
+ assert last_name is not None, "Last name is required to calculate patient hash."
167
+
165
168
  hash_str = get_patient_hash(
166
169
  first_name=first_name,
167
170
  last_name=last_name,
@@ -208,11 +211,11 @@ def create_pseudo_examiner_logic(instance: "SensitiveMeta") -> "Examiner":
208
211
 
209
212
  if not first_name or not last_name or not center:
210
213
  logger.warning(
211
- f"Incomplete examiner info for SensitiveMeta (pk={instance.pk}). Using default examiner."
214
+ f"Incomplete examiner info for SensitiveMeta (pk={instance.pk or 'new'}). Using default examiner."
212
215
  )
213
216
  # Ensure default center exists or handle appropriately
214
217
  try:
215
- default_center = Center.objects.get_by_natural_key("endoreg_db_demo")
218
+ default_center = Center.objects.get(name="endoreg_db_demo")
216
219
  except Center.DoesNotExist:
217
220
  logger.error(
218
221
  "Default center 'endoreg_db_demo' not found. Cannot create default examiner."
@@ -287,7 +290,53 @@ def perform_save_logic(instance: "SensitiveMeta") -> "Examiner":
287
290
  """
288
291
  Contains the core logic for preparing a SensitiveMeta instance for saving.
289
292
  Handles data generation (dates), hash calculation, and linking pseudo-entities.
290
- Returns the Examiner instance to be linked via M2M after the main save.
293
+
294
+ This function is called on every save() operation and implements a two-phase approach:
295
+
296
+ **Phase 1: Initial Creation (with defaults)**
297
+ - When a SensitiveMeta is first created (e.g., via get_or_create_sensitive_meta()),
298
+ it may have missing patient data (names, DOB, etc.)
299
+ - Default values are set to prevent hash calculation errors:
300
+ * patient_first_name: "unknown"
301
+ * patient_last_name: "unknown"
302
+ * patient_dob: random date (1920-2000)
303
+ - A temporary hash is calculated using these defaults
304
+ - Temporary pseudo-entities (Patient, Examination) are created
305
+
306
+ **Phase 2: Update (with extracted data)**
307
+ - When real patient data is extracted (e.g., from video OCR via lx_anonymizer),
308
+ update_from_dict() is called with actual values
309
+ - The instance fields are updated with real data (names, DOB, etc.)
310
+ - save() is called again, triggering this function
311
+ - Default-setting logic is skipped (fields are no longer empty)
312
+ - Hash is RECALCULATED with real data
313
+ - New pseudo-entities are created/retrieved based on new hash
314
+
315
+ **Example Flow:**
316
+ ```
317
+ # Initial creation
318
+ sm = SensitiveMeta.create_from_dict({"center": center})
319
+ # → patient_first_name = "unknown", patient_last_name = "unknown"
320
+ # → hash = sha256("unknown unknown 1990-01-01 ...")
321
+ # → pseudo_patient_temp created
322
+
323
+ # Later update with extracted data
324
+ sm.update_from_dict({"patient_first_name": "Max", "patient_last_name": "Mustermann"})
325
+ # → patient_first_name = "Max", patient_last_name = "Mustermann" (overwrites)
326
+ # → save() triggered → perform_save_logic() called again
327
+ # → Default-setting skipped (names already exist)
328
+ # → hash = sha256("Max Mustermann 1985-03-15 ...") (RECALCULATED)
329
+ # → pseudo_patient_real created/retrieved with new hash
330
+ ```
331
+
332
+ Args:
333
+ instance: The SensitiveMeta instance being saved
334
+
335
+ Returns:
336
+ Examiner: The pseudo examiner instance to be linked via M2M after save
337
+
338
+ Raises:
339
+ ValueError: If required fields (center, gender) cannot be determined
291
340
  """
292
341
 
293
342
  # --- Pre-Save Checks and Data Generation ---
@@ -295,12 +344,12 @@ def perform_save_logic(instance: "SensitiveMeta") -> "Examiner":
295
344
  # 1. Ensure DOB and Examination Date exist
296
345
  if not instance.patient_dob:
297
346
  logger.debug(
298
- f"SensitiveMeta (pk={instance.pk}): Patient DOB missing, generating random."
347
+ f"SensitiveMeta (pk={instance.pk or 'new'}): Patient DOB missing, generating random."
299
348
  )
300
349
  instance.patient_dob = generate_random_dob()
301
350
  if not instance.examination_date:
302
351
  logger.debug(
303
- f"SensitiveMeta (pk={instance.pk}): Examination date missing, generating random."
352
+ f"SensitiveMeta (pk={instance.pk or 'new'}): Examination date missing, generating random."
304
353
  )
305
354
  instance.examination_date = generate_random_examination_date()
306
355
 
@@ -308,18 +357,70 @@ def perform_save_logic(instance: "SensitiveMeta") -> "Examiner":
308
357
  if not instance.center:
309
358
  raise ValueError("Center must be set before saving SensitiveMeta.")
310
359
 
360
+ # 2.5 CRITICAL: Set default patient names BEFORE hash calculation
361
+ #
362
+ # **Why this is necessary:**
363
+ # Hash calculation (step 4) requires first_name and last_name to be non-None.
364
+ # However, on initial creation (e.g., via get_or_create_sensitive_meta()), these
365
+ # fields may be empty because real patient data hasn't been extracted yet.
366
+ #
367
+ # **Two-phase approach:**
368
+ # - Phase 1 (Initial): Set defaults if names are missing
369
+ # → Allows hash calculation to succeed without errors
370
+ # → Creates temporary pseudo-entities with default hash
371
+ #
372
+ # - Phase 2 (Update): Real data extraction (OCR, manual input)
373
+ # → update_from_dict() sets real names ("Max", "Mustermann")
374
+ # → save() is called again
375
+ # → This block is SKIPPED (names already exist)
376
+ # → Hash is recalculated with real data (step 4)
377
+ # → New pseudo-entities created with correct hash
378
+ #
379
+ # **Example:**
380
+ # Initial: patient_first_name = "unknown" → hash = sha256("unknown unknown...")
381
+ # Updated: patient_first_name = "Max" → hash = sha256("Max Mustermann...")
382
+ #
383
+ if not instance.patient_first_name:
384
+ instance.patient_first_name = DEFAULT_UNKNOWN_NAME
385
+ logger.debug(
386
+ "SensitiveMeta (pk=%s): Patient first name missing, set to default '%s'.",
387
+ instance.pk or "new",
388
+ DEFAULT_UNKNOWN_NAME,
389
+ )
390
+
391
+ if not instance.patient_last_name:
392
+ instance.patient_last_name = DEFAULT_UNKNOWN_NAME
393
+ logger.debug(
394
+ "SensitiveMeta (pk=%s): Patient last name missing, set to default '%s'.",
395
+ instance.pk or "new",
396
+ DEFAULT_UNKNOWN_NAME,
397
+ )
398
+
311
399
  # 3. Ensure Gender exists (should be set before calling save, e.g., during creation/update)
312
400
  if not instance.patient_gender:
313
- # Attempt to guess if names are available
314
- first_name = instance.patient_first_name or DEFAULT_UNKNOWN_NAME
315
- gender = guess_name_gender(first_name)
316
- if not gender:
401
+ # Use the now-guaranteed first_name for gender guessing
402
+ first_name = instance.patient_first_name
403
+ gender_str = guess_name_gender(first_name)
404
+ if not gender_str:
317
405
  raise ValueError(
318
406
  "Patient gender could not be determined and must be set before saving."
319
407
  )
320
- instance.patient_gender = gender
408
+ # Convert string to Gender object
409
+ try:
410
+ gender_obj = Gender.objects.get(name=gender_str)
411
+ instance.patient_gender = gender_obj
412
+ except Gender.DoesNotExist:
413
+ raise ValueError(f"Gender '{gender_str}' not found in database.")
321
414
 
322
415
  # 4. Calculate Hashes (depends on DOB, Exam Date, Center, Names)
416
+ #
417
+ # **IMPORTANT: Hashes are RECALCULATED on every save!**
418
+ # This enables the two-phase update pattern:
419
+ # - Initial save: Hash based on default "unknown unknown" names
420
+ # - Updated save: Hash based on real extracted names ("Max Mustermann")
421
+ #
422
+ # The new hash will link to different pseudo-entities, ensuring proper
423
+ # anonymization while maintaining referential integrity.
323
424
  instance.patient_hash = calculate_patient_hash(instance)
324
425
  instance.examination_hash = calculate_examination_hash(instance)
325
426
 
@@ -347,7 +448,50 @@ def perform_save_logic(instance: "SensitiveMeta") -> "Examiner":
347
448
  def create_sensitive_meta_from_dict(
348
449
  cls: Type["SensitiveMeta"], data: Dict[str, Any]
349
450
  ) -> "SensitiveMeta":
350
- """Logic to create a SensitiveMeta instance from a dictionary."""
451
+ """
452
+ Create a SensitiveMeta instance from a dictionary.
453
+
454
+ **Center handling:**
455
+ This function accepts TWO ways to specify the center:
456
+ 1. `center` (Center object) - Directly pass a Center instance
457
+ 2. `center_name` (string) - Pass the center name as a string (will be resolved to Center object)
458
+
459
+ At least ONE of these must be provided.
460
+
461
+ **Example usage:**
462
+ ```python
463
+ # Option 1: With Center object
464
+ data = {
465
+ "patient_first_name": "Patient",
466
+ "patient_last_name": "Unknown",
467
+ "patient_dob": date(1990, 1, 1),
468
+ "examination_date": date.today(),
469
+ "center": center_obj, # ← Center object
470
+ }
471
+ sm = SensitiveMeta.create_from_dict(data)
472
+
473
+ # Option 2: With center name string
474
+ data = {
475
+ "patient_first_name": "Patient",
476
+ "patient_last_name": "Unknown",
477
+ "patient_dob": date(1990, 1, 1),
478
+ "examination_date": date.today(),
479
+ "center_name": "university_hospital_wuerzburg", # ← String
480
+ }
481
+ sm = SensitiveMeta.create_from_dict(data)
482
+ ```
483
+
484
+ Args:
485
+ cls: The SensitiveMeta class
486
+ data: Dictionary containing field values
487
+
488
+ Returns:
489
+ SensitiveMeta: The created instance
490
+
491
+ Raises:
492
+ ValueError: If neither center nor center_name is provided
493
+ ValueError: If center_name does not match any Center in database
494
+ """
351
495
 
352
496
  field_names = {
353
497
  f.name
@@ -484,15 +628,29 @@ def create_sensitive_meta_from_dict(
484
628
  )
485
629
  selected_data.pop("examination_date", None)
486
630
 
487
- # Handle Center
631
+ # Handle Center - accept both center_name (string) and center (object)
632
+ from ..administration import Center
633
+
634
+ center = data.get("center") # First try direct Center object
488
635
  center_name = data.get("center_name")
489
- if not center_name:
490
- raise ValueError("center_name is required in data dictionary.")
491
- try:
492
- center = Center.objects.get_by_natural_key(center_name)
636
+
637
+ if center is not None:
638
+ # Center object provided directly - validate it's a Center instance
639
+ if not isinstance(center, Center):
640
+ raise ValueError(f"'center' must be a Center instance, got {type(center)}")
493
641
  selected_data["center"] = center
494
- except Center.DoesNotExist as exc:
495
- raise ValueError(f"Center with name '{center_name}' does not exist.") from exc
642
+ elif center_name:
643
+ # center_name string provided - resolve to Center object
644
+ try:
645
+ center = Center.objects.get(name=center_name)
646
+ selected_data["center"] = center
647
+ except Center.DoesNotExist:
648
+ raise ValueError(f"Center with name '{center_name}' does not exist.")
649
+ else:
650
+ # Neither center nor center_name provided
651
+ raise ValueError(
652
+ "Either 'center' (Center object) or 'center_name' (string) is required in data dictionary."
653
+ )
496
654
 
497
655
  # Handle Names and Gender
498
656
  first_name = selected_data.get("patient_first_name") or DEFAULT_UNKNOWN_NAME
@@ -552,7 +710,50 @@ def create_sensitive_meta_from_dict(
552
710
  def update_sensitive_meta_from_dict(
553
711
  instance: "SensitiveMeta", data: Dict[str, Any]
554
712
  ) -> "SensitiveMeta":
555
- """Logic to update a SensitiveMeta instance from a dictionary."""
713
+ """
714
+ Updates a SensitiveMeta instance from a dictionary of new values.
715
+
716
+ **Integration with two-phase save pattern:**
717
+ This function is typically called after initial SensitiveMeta creation when real
718
+ patient data becomes available (e.g., extracted from video OCR, PDF parsing, or
719
+ manual annotation).
720
+
721
+ **Example workflow:**
722
+ ```python
723
+ # Phase 1: Initial creation with defaults
724
+ sm = SensitiveMeta.create_from_dict({"center": center})
725
+ # → patient_first_name = "unknown", hash = sha256("unknown...")
726
+
727
+ # Phase 2: Update with extracted data
728
+ extracted = {
729
+ "patient_first_name": "Max",
730
+ "patient_last_name": "Mustermann",
731
+ "patient_dob": date(1985, 3, 15)
732
+ }
733
+ update_sensitive_meta_from_dict(sm, extracted)
734
+ # → Sets: sm.patient_first_name = "Max", sm.patient_last_name = "Mustermann"
735
+ # → Calls: sm.save()
736
+ # → Triggers: perform_save_logic() again
737
+ # → Result: Hash recalculated with real data, new pseudo-entities created
738
+ ```
739
+
740
+ **Key behaviors:**
741
+ - Updates instance attributes from provided dictionary
742
+ - Handles type conversions (date strings → date objects, gender strings → Gender objects)
743
+ - Tracks patient name changes to update name database
744
+ - Calls save() at the end, triggering full save logic including hash recalculation
745
+ - Default-setting in perform_save_logic() is skipped (fields already populated)
746
+
747
+ Args:
748
+ instance: The existing SensitiveMeta instance to update
749
+ data: Dictionary of field names and new values
750
+
751
+ Returns:
752
+ The updated SensitiveMeta instance
753
+
754
+ Raises:
755
+ Exception: If save fails or required conversions fail
756
+ """
556
757
  field_names = {
557
758
  f.name
558
759
  for f in instance._meta.get_fields()
@@ -564,17 +765,37 @@ def update_sensitive_meta_from_dict(
564
765
  k: v for k, v in data.items() if k in field_names and k not in excluded_fields
565
766
  }
566
767
 
567
- # Handle potential Center update
768
+ # Handle potential Center update - accept both center_name (string) and center (object)
769
+ from ..administration import Center
770
+
771
+ center = data.get("center") # First try direct Center object
568
772
  center_name = data.get("center_name")
569
- if center_name:
773
+
774
+ if center is not None:
775
+ # Center object provided directly - validate and update
776
+ if isinstance(center, Center):
777
+ instance.center = center
778
+ logger.debug(f"Updated center from Center object: {center.name}")
779
+ else:
780
+ logger.warning(
781
+ f"Invalid center type {type(center)}, expected Center instance. Ignoring."
782
+ )
783
+ # Remove from selected_data to prevent override
784
+ selected_data.pop("center", None)
785
+ elif center_name:
786
+ # center_name string provided - resolve to Center object
570
787
  try:
571
- center = Center.objects.get_by_natural_key(center_name)
572
- instance.center = center # Update center directly
573
- except Center.DoesNotExist as exc:
788
+ center_obj = Center.objects.get(name=center_name)
789
+ instance.center = center_obj
790
+ logger.debug(f"Updated center from center_name string: {center_name}")
791
+ except Center.DoesNotExist:
574
792
  logger.warning(
575
793
  f"Center '{center_name}' not found during update. Keeping existing center."
576
794
  )
577
- selected_data.pop("center", None) # Remove from dict if not found
795
+ else:
796
+ # Both are None/missing - remove 'center' from selected_data to preserve existing value
797
+ selected_data.pop("center", None)
798
+ # If both are None/missing, keep existing center (no update needed)
578
799
 
579
800
  # Set examiner names if provided, before calling save
580
801
  examiner_first_name = data.get("examiner_first_name")
@@ -657,6 +878,11 @@ def update_sensitive_meta_from_dict(
657
878
  # Update other attributes from selected_data
658
879
  patient_name_changed = False
659
880
  for k, v in selected_data.items():
881
+ # Skip None values to avoid overwriting existing data
882
+ if v is None:
883
+ logger.debug(f"Skipping field '{k}' during update because value is None")
884
+ continue
885
+
660
886
  # Avoid overwriting examiner names if they were just explicitly set
661
887
  if (
662
888
  k not in ["examiner_first_name", "examiner_last_name"]