endoreg-db 0.8.5.5__py3-none-any.whl → 0.8.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -547,7 +547,13 @@ class VideoFile(models.Model):
547
547
  self.sensitive_meta = SensitiveMeta.create_from_dict(default_data)
548
548
 
549
549
  # CRITICAL FIX: Delete RAW video file, not the processed (anonymized) one
550
- # After validation, only the anonymized video should remain
550
+ # CRITICAL: Update metadata BEFORE deleting raw video
551
+ # Metadata update may trigger frame extraction, which needs raw video
552
+ sensitive_meta = _update_text_metadata(
553
+ self, extracted_data_dict, overwrite=True
554
+ )
555
+
556
+ # After validation and metadata update, only the anonymized video should remain
551
557
  from .video_file_io import _get_raw_file_path
552
558
 
553
559
  raw_path = _get_raw_file_path(self)
@@ -563,11 +569,6 @@ class VideoFile(models.Model):
563
569
  else:
564
570
  logger.warning(f"Raw video file not found for deletion: {self.uuid}")
565
571
 
566
- # Update sensitive metadata with user annotations
567
- sensitive_meta = _update_text_metadata(
568
- self, extracted_data_dict, overwrite=True
569
- )
570
-
571
572
  if sensitive_meta:
572
573
  # Mark as processed after validation
573
574
  self.get_or_create_state().mark_sensitive_meta_processed(save=True)
@@ -43,7 +43,9 @@ class PdfImportService:
43
43
  - 'cropping': Advanced mode that crops sensitive regions to separate images
44
44
  """
45
45
 
46
- def __init__(self, allow_meta_overwrite: bool = False, processing_mode: str = "blackening"):
46
+ def __init__(
47
+ self, allow_meta_overwrite: bool = False, processing_mode: str = "blackening"
48
+ ):
47
49
  """
48
50
  Initialize the PDF import service.
49
51
 
@@ -59,7 +61,9 @@ class PdfImportService:
59
61
  # Validate and set processing mode
60
62
  valid_modes = ["blackening", "cropping"]
61
63
  if processing_mode not in valid_modes:
62
- raise ValueError(f"Invalid processing_mode '{processing_mode}'. Must be one of: {valid_modes}")
64
+ raise ValueError(
65
+ f"Invalid processing_mode '{processing_mode}'. Must be one of: {valid_modes}"
66
+ )
63
67
  self.processing_mode = processing_mode
64
68
 
65
69
  # Central PDF instance management
@@ -77,7 +81,9 @@ class PdfImportService:
77
81
  Returns:
78
82
  PdfImportService instance configured for blackening mode
79
83
  """
80
- return cls(allow_meta_overwrite=allow_meta_overwrite, processing_mode="blackening")
84
+ return cls(
85
+ allow_meta_overwrite=allow_meta_overwrite, processing_mode="blackening"
86
+ )
81
87
 
82
88
  @classmethod
83
89
  def with_cropping(cls, allow_meta_overwrite: bool = False) -> "PdfImportService":
@@ -90,7 +96,9 @@ class PdfImportService:
90
96
  Returns:
91
97
  PdfImportService instance configured for cropping mode
92
98
  """
93
- return cls(allow_meta_overwrite=allow_meta_overwrite, processing_mode="cropping")
99
+ return cls(
100
+ allow_meta_overwrite=allow_meta_overwrite, processing_mode="cropping"
101
+ )
94
102
 
95
103
  @contextmanager
96
104
  def _file_lock(self, path: Path):
@@ -115,10 +123,16 @@ class PdfImportService:
115
123
 
116
124
  if age is not None and age > STALE_LOCK_SECONDS:
117
125
  try:
118
- logger.warning("Stale lock detected for %s (age %.0fs). Reclaiming lock...", path, age)
126
+ logger.warning(
127
+ "Stale lock detected for %s (age %.0fs). Reclaiming lock...",
128
+ path,
129
+ age,
130
+ )
119
131
  lock_path.unlink()
120
132
  except Exception as e:
121
- logger.warning("Failed to remove stale lock %s: %s", lock_path, e)
133
+ logger.warning(
134
+ "Failed to remove stale lock %s: %s", lock_path, e
135
+ )
122
136
  # retry acquire
123
137
  fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
124
138
  else:
@@ -233,12 +247,16 @@ class PdfImportService:
233
247
  try:
234
248
  mod = importlib.import_module("lx_anonymizer")
235
249
  ReportReader = getattr(mod, "ReportReader")
236
- logger.info("Imported lx_anonymizer.ReportReader via LX_ANONYMIZER_PATH")
250
+ logger.info(
251
+ "Imported lx_anonymizer.ReportReader via LX_ANONYMIZER_PATH"
252
+ )
237
253
  self._report_reader_available = True
238
254
  self._report_reader_class = ReportReader
239
255
  return True, ReportReader
240
256
  except Exception as e:
241
- logger.warning("Failed importing lx_anonymizer via LX_ANONYMIZER_PATH: %s", e)
257
+ logger.warning(
258
+ "Failed importing lx_anonymizer via LX_ANONYMIZER_PATH: %s", e
259
+ )
242
260
  finally:
243
261
  # Keep path for future imports if it worked; otherwise remove.
244
262
  if "ReportReader" not in locals() and extra in sys.path:
@@ -259,11 +277,15 @@ class PdfImportService:
259
277
  """
260
278
  pdf_file = pdf_instance or self.current_pdf
261
279
  if not pdf_file:
262
- logger.warning("No PDF instance available for ensuring default patient data")
280
+ logger.warning(
281
+ "No PDF instance available for ensuring default patient data"
282
+ )
263
283
  return
264
284
 
265
285
  if not pdf_file.sensitive_meta:
266
- logger.info(f"No SensitiveMeta found for PDF {pdf_file.pdf_hash}, creating default")
286
+ logger.info(
287
+ f"No SensitiveMeta found for PDF {pdf_file.pdf_hash}, creating default"
288
+ )
267
289
 
268
290
  # Create default SensitiveMeta with placeholder data
269
291
  default_data = {
@@ -271,16 +293,22 @@ class PdfImportService:
271
293
  "patient_last_name": "Unknown",
272
294
  "patient_dob": date(1990, 1, 1), # Default DOB
273
295
  "examination_date": date.today(),
274
- "center_name": pdf_file.center.name if pdf_file.center else "university_hospital_wuerzburg",
296
+ "center_name": pdf_file.center.name
297
+ if pdf_file.center
298
+ else "university_hospital_wuerzburg",
275
299
  }
276
300
 
277
301
  try:
278
302
  sensitive_meta = SensitiveMeta.create_from_dict(default_data)
279
303
  pdf_file.sensitive_meta = sensitive_meta
280
304
  pdf_file.save(update_fields=["sensitive_meta"])
281
- logger.info(f"Created default SensitiveMeta for PDF {pdf_file.pdf_hash}")
305
+ logger.info(
306
+ f"Created default SensitiveMeta for PDF {pdf_file.pdf_hash}"
307
+ )
282
308
  except Exception as e:
283
- logger.error(f"Failed to create default SensitiveMeta for PDF {pdf_file.pdf_hash}: {e}")
309
+ logger.error(
310
+ f"Failed to create default SensitiveMeta for PDF {pdf_file.pdf_hash}: {e}"
311
+ )
284
312
 
285
313
  def import_and_anonymize(
286
314
  self,
@@ -311,7 +339,9 @@ class PdfImportService:
311
339
  """
312
340
  try:
313
341
  # Initialize processing context
314
- self._initialize_processing_context(file_path, center_name, delete_source, retry)
342
+ self._initialize_processing_context(
343
+ file_path, center_name, delete_source, retry
344
+ )
315
345
 
316
346
  # Step 1: Validate and prepare file
317
347
  self._validate_and_prepare_file()
@@ -321,7 +351,9 @@ class PdfImportService:
321
351
 
322
352
  # Early return check - if no PDF instance was created, return None
323
353
  if not self.current_pdf:
324
- logger.warning(f"No PDF instance created for {file_path}, returning None")
354
+ logger.warning(
355
+ f"No PDF instance created for {file_path}, returning None"
356
+ )
325
357
  return None
326
358
 
327
359
  # Step 3: Setup processing environment
@@ -353,7 +385,13 @@ class PdfImportService:
353
385
  # Always cleanup context
354
386
  self._cleanup_processing_context()
355
387
 
356
- def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str, delete_source: bool, retry: bool):
388
+ def _initialize_processing_context(
389
+ self,
390
+ file_path: Union[Path, str],
391
+ center_name: str,
392
+ delete_source: bool,
393
+ retry: bool,
394
+ ):
357
395
  """Initialize the processing context for the current PDF."""
358
396
  self.processing_context = {
359
397
  "file_path": Path(file_path),
@@ -370,7 +408,9 @@ class PdfImportService:
370
408
 
371
409
  # Check if already processed (only during current session to prevent race conditions)
372
410
  if str(file_path) in self.processed_files:
373
- logger.info(f"File {file_path} already being processed in current session, skipping")
411
+ logger.info(
412
+ f"File {file_path} already being processed in current session, skipping"
413
+ )
374
414
  raise ValueError("File already being processed")
375
415
 
376
416
  logger.info(f"Starting import and processing for: {file_path}")
@@ -406,7 +446,9 @@ class PdfImportService:
406
446
  if existing:
407
447
  logger.info(f"Found existing RawPdfFile {existing.pdf_hash}")
408
448
  if existing.text:
409
- logger.info(f"Existing PDF {existing.pdf_hash} already processed - returning")
449
+ logger.info(
450
+ f"Existing PDF {existing.pdf_hash} already processed - returning"
451
+ )
410
452
  self.current_pdf = existing
411
453
  return
412
454
  else:
@@ -428,11 +470,15 @@ class PdfImportService:
428
470
  else:
429
471
  # Retrieve existing for retry
430
472
  self.current_pdf = RawPdfFile.objects.get(pdf_hash=file_hash)
431
- logger.info(f"Retrying import for existing RawPdfFile {self.current_pdf.pdf_hash}")
473
+ logger.info(
474
+ f"Retrying import for existing RawPdfFile {self.current_pdf.pdf_hash}"
475
+ )
432
476
 
433
477
  # Check if retry is actually needed
434
478
  if self.current_pdf.text:
435
- logger.info(f"Existing PDF {self.current_pdf.pdf_hash} already processed during retry - returning")
479
+ logger.info(
480
+ f"Existing PDF {self.current_pdf.pdf_hash} already processed during retry - returning"
481
+ )
436
482
  return
437
483
 
438
484
  if not self.current_pdf:
@@ -459,7 +505,9 @@ class PdfImportService:
459
505
  self.processing_context["file_path"] = self.current_pdf.file.path
460
506
  self.processing_context["sensitive_copy_created"] = True
461
507
  try:
462
- self.processing_context["sensitive_file_path"] = Path(self.current_pdf.file.path)
508
+ self.processing_context["sensitive_file_path"] = Path(
509
+ self.current_pdf.file.path
510
+ )
463
511
  except Exception:
464
512
  self.processing_context["sensitive_file_path"] = None
465
513
 
@@ -490,10 +538,16 @@ class PdfImportService:
490
538
  return
491
539
 
492
540
  try:
493
- logger.info(f"Starting text extraction and metadata processing with ReportReader (mode: {self.processing_mode})...")
541
+ logger.info(
542
+ f"Starting text extraction and metadata processing with ReportReader (mode: {self.processing_mode})..."
543
+ )
494
544
 
495
545
  # Initialize ReportReader
496
- report_reader = ReportReader(report_root_path=str(path_utils.STORAGE_DIR), locale="de_DE", text_date_format="%d.%m.%Y")
546
+ report_reader = ReportReader(
547
+ report_root_path=str(path_utils.STORAGE_DIR),
548
+ locale="de_DE",
549
+ text_date_format="%d.%m.%Y",
550
+ )
497
551
 
498
552
  if self.processing_mode == "cropping":
499
553
  # Use advanced cropping method (existing implementation)
@@ -519,8 +573,12 @@ class PdfImportService:
519
573
  anonymized_output_path = anonymized_dir / f"{pdf_hash}_anonymized.pdf"
520
574
 
521
575
  # Process with enhanced process_report method (returns 4-tuple now)
522
- original_text, anonymized_text, extracted_metadata, anonymized_pdf_path = report_reader.process_report(
523
- pdf_path=self.processing_context["file_path"], create_anonymized_pdf=True, anonymized_pdf_output_path=str(anonymized_output_path)
576
+ original_text, anonymized_text, extracted_metadata, anonymized_pdf_path = (
577
+ report_reader.process_report(
578
+ pdf_path=self.processing_context["file_path"],
579
+ create_anonymized_pdf=True,
580
+ anonymized_pdf_output_path=str(anonymized_output_path),
581
+ )
524
582
  )
525
583
 
526
584
  # Store results in context
@@ -560,7 +618,13 @@ class PdfImportService:
560
618
  anonymized_dir.mkdir(parents=True, exist_ok=True)
561
619
 
562
620
  # Process with cropping (returns 5-tuple)
563
- original_text, anonymized_text, extracted_metadata, cropped_regions, anonymized_pdf_path = report_reader.process_report_with_cropping(
621
+ (
622
+ original_text,
623
+ anonymized_text,
624
+ extracted_metadata,
625
+ cropped_regions,
626
+ anonymized_pdf_path,
627
+ ) = report_reader.process_report_with_cropping(
564
628
  pdf_path=self.processing_context["file_path"],
565
629
  crop_sensitive_regions=True,
566
630
  crop_output_dir=str(crops_dir),
@@ -661,7 +725,11 @@ class PdfImportService:
661
725
  new_value = raw_value
662
726
 
663
727
  # Configurable overwrite policy
664
- should_overwrite = self.allow_meta_overwrite or not old_value or old_value in ["Patient", "Unknown"]
728
+ should_overwrite = (
729
+ self.allow_meta_overwrite
730
+ or not old_value
731
+ or old_value in ["Patient", "Unknown"]
732
+ )
665
733
  if new_value and should_overwrite:
666
734
  setattr(sm, sm_field, new_value)
667
735
  updated_fields.append(sm_field)
@@ -676,7 +744,11 @@ class PdfImportService:
676
744
  if isinstance(raw_value, str):
677
745
  # Skip if the value is just the field name itself
678
746
  if raw_value == meta_key:
679
- logger.warning("Skipping date field %s - got field name '%s' instead of actual date", sm_field, raw_value)
747
+ logger.warning(
748
+ "Skipping date field %s - got field name '%s' instead of actual date",
749
+ sm_field,
750
+ raw_value,
751
+ )
680
752
  return None
681
753
 
682
754
  # Try common date formats
@@ -687,7 +759,9 @@ class PdfImportService:
687
759
  except ValueError:
688
760
  continue
689
761
 
690
- logger.warning("Could not parse date '%s' for field %s", raw_value, sm_field)
762
+ logger.warning(
763
+ "Could not parse date '%s' for field %s", raw_value, sm_field
764
+ )
691
765
  return None
692
766
 
693
767
  elif hasattr(raw_value, "date"):
@@ -721,7 +795,10 @@ class PdfImportService:
721
795
 
722
796
  anonymized_path = Path(anonymized_pdf_path)
723
797
  if not anonymized_path.exists():
724
- logger.warning("Anonymized PDF path returned but file does not exist: %s", anonymized_path)
798
+ logger.warning(
799
+ "Anonymized PDF path returned but file does not exist: %s",
800
+ anonymized_path,
801
+ )
725
802
  return
726
803
 
727
804
  logger.info("Anonymized PDF created by ReportReader at: %s", anonymized_path)
@@ -746,7 +823,9 @@ class PdfImportService:
746
823
  # If your model has a field for this, persist there; otherwise we just log.
747
824
  cropped_regions = self.processing_context.get("cropped_regions")
748
825
  if cropped_regions:
749
- logger.debug("Cropped regions recorded (%d regions).", len(cropped_regions))
826
+ logger.debug(
827
+ "Cropped regions recorded (%d regions).", len(cropped_regions)
828
+ )
750
829
 
751
830
  # Save model changes
752
831
  update_fields = ["anonymized_file"]
@@ -759,7 +838,10 @@ class PdfImportService:
759
838
  if state and not state.anonymized:
760
839
  state.mark_anonymized(save=True)
761
840
 
762
- logger.info("Updated anonymized_file reference to: %s", self.current_pdf.anonymized_file.name)
841
+ logger.info(
842
+ "Updated anonymized_file reference to: %s",
843
+ self.current_pdf.anonymized_file.name,
844
+ )
763
845
 
764
846
  except Exception as e:
765
847
  logger.warning("Could not set anonymized file reference: %s", e)
@@ -790,7 +872,9 @@ class PdfImportService:
790
872
  def _mark_processing_incomplete(self, reason: str):
791
873
  """Mark processing as incomplete with reason."""
792
874
  if not self.current_pdf:
793
- logger.warning(f"Cannot mark processing incomplete - no PDF instance available. Reason: {reason}")
875
+ logger.warning(
876
+ f"Cannot mark processing incomplete - no PDF instance available. Reason: {reason}"
877
+ )
794
878
  return
795
879
 
796
880
  try:
@@ -809,22 +893,44 @@ class PdfImportService:
809
893
  logger.warning(f"Failed to mark processing incomplete: {e}")
810
894
 
811
895
  def _retry_existing_pdf(self, existing_pdf):
812
- """Retry processing for existing PDF."""
896
+ """
897
+ Retry processing for existing PDF.
898
+
899
+ Uses get_raw_file_path() to find the original raw file instead of
900
+ relying on the file field which may point to a deleted sensitive file.
901
+ """
813
902
  try:
903
+ # ✅ FIX: Use get_raw_file_path() to find original file
904
+ raw_file_path = existing_pdf.get_raw_file_path()
905
+
906
+ if not raw_file_path or not raw_file_path.exists():
907
+ logger.error(
908
+ f"Cannot retry PDF {existing_pdf.pdf_hash}: Raw file not found. "
909
+ f"Please re-upload the original PDF file."
910
+ )
911
+ self.current_pdf = existing_pdf
912
+ return existing_pdf
913
+
914
+ logger.info(f"Found raw file for retry at: {raw_file_path}")
915
+
814
916
  # Remove from processed files to allow retry
815
- file_path_str = str(existing_pdf.file.path) if existing_pdf.file else None
816
- if file_path_str and file_path_str in self.processed_files:
917
+ file_path_str = str(raw_file_path)
918
+ if file_path_str in self.processed_files:
817
919
  self.processed_files.remove(file_path_str)
818
920
  logger.debug(f"Removed {file_path_str} from processed files for retry")
819
921
 
820
922
  return self.import_and_anonymize(
821
- file_path=existing_pdf.file.path,
822
- center_name=existing_pdf.center.name if existing_pdf.center else "unknown_center",
823
- delete_source=False,
923
+ file_path=raw_file_path, # ✅ Use raw file path, not sensitive path
924
+ center_name=existing_pdf.center.name
925
+ if existing_pdf.center
926
+ else "unknown_center",
927
+ delete_source=False, # Never delete during retry
824
928
  retry=True,
825
929
  )
826
930
  except Exception as e:
827
- logger.error(f"Failed to re-import existing PDF {existing_pdf.pdf_hash}: {e}")
931
+ logger.error(
932
+ f"Failed to re-import existing PDF {existing_pdf.pdf_hash}: {e}"
933
+ )
828
934
  self.current_pdf = existing_pdf
829
935
  return existing_pdf
830
936
 
@@ -852,26 +958,51 @@ class PdfImportService:
852
958
  if file_field and getattr(file_field, "name", None):
853
959
  storage_name = file_field.name
854
960
  file_field.delete(save=False)
855
- logger.debug("Deleted sensitive copy %s during error cleanup", storage_name)
961
+ logger.debug(
962
+ "Deleted sensitive copy %s during error cleanup",
963
+ storage_name,
964
+ )
856
965
  except Exception as cleanup_exc:
857
- logger.warning("Failed to remove sensitive copy during error cleanup: %s", cleanup_exc)
966
+ logger.warning(
967
+ "Failed to remove sensitive copy during error cleanup: %s",
968
+ cleanup_exc,
969
+ )
858
970
 
859
971
  # Always clean up processed files set to prevent blocks
860
972
  file_path = self.processing_context.get("file_path")
861
973
  if file_path and str(file_path) in self.processed_files:
862
974
  self.processed_files.remove(str(file_path))
863
- logger.debug(f"Removed {file_path} from processed files during error cleanup")
975
+ logger.debug(
976
+ f"Removed {file_path} from processed files during error cleanup"
977
+ )
864
978
 
865
979
  try:
866
980
  original_path = self.processing_context.get("original_file_path")
867
- logger.debug("PDF cleanup original path: %s (%s)", original_path, type(original_path))
868
- raw_dir = original_path.parent if isinstance(original_path, Path) else None
869
- if isinstance(original_path, Path) and original_path.exists() and not self.processing_context.get("sensitive_copy_created"):
981
+ logger.debug(
982
+ "PDF cleanup original path: %s (%s)",
983
+ original_path,
984
+ type(original_path),
985
+ )
986
+ raw_dir = (
987
+ original_path.parent if isinstance(original_path, Path) else None
988
+ )
989
+ if (
990
+ isinstance(original_path, Path)
991
+ and original_path.exists()
992
+ and not self.processing_context.get("sensitive_copy_created")
993
+ ):
870
994
  try:
871
995
  original_path.unlink()
872
- logger.info("Removed original file %s during error cleanup", original_path)
996
+ logger.info(
997
+ "Removed original file %s during error cleanup",
998
+ original_path,
999
+ )
873
1000
  except Exception as remove_exc:
874
- logger.warning("Could not remove original file %s during error cleanup: %s", original_path, remove_exc)
1001
+ logger.warning(
1002
+ "Could not remove original file %s during error cleanup: %s",
1003
+ original_path,
1004
+ remove_exc,
1005
+ )
875
1006
  pdf_dir = self._get_pdf_dir()
876
1007
  if not pdf_dir and raw_dir:
877
1008
  base_dir = raw_dir.parent
@@ -888,7 +1019,12 @@ class PdfImportService:
888
1019
 
889
1020
  # Remove empty PDF subdirectories that might have been created during setup
890
1021
  if pdf_dir and pdf_dir.exists():
891
- for subdir_name in ("sensitive", "cropped_regions", "anonymized", "_processing"):
1022
+ for subdir_name in (
1023
+ "sensitive",
1024
+ "cropped_regions",
1025
+ "anonymized",
1026
+ "_processing",
1027
+ ):
892
1028
  subdir_path = pdf_dir / subdir_name
893
1029
  if subdir_path.exists() and subdir_path.is_dir():
894
1030
  try:
@@ -896,22 +1032,49 @@ class PdfImportService:
896
1032
  except StopIteration:
897
1033
  try:
898
1034
  subdir_path.rmdir()
899
- logger.debug("Removed empty directory %s during error cleanup", subdir_path)
1035
+ logger.debug(
1036
+ "Removed empty directory %s during error cleanup",
1037
+ subdir_path,
1038
+ )
900
1039
  except OSError as rm_err:
901
- logger.debug("Could not remove directory %s: %s", subdir_path, rm_err)
1040
+ logger.debug(
1041
+ "Could not remove directory %s: %s",
1042
+ subdir_path,
1043
+ rm_err,
1044
+ )
902
1045
  except Exception as iter_err:
903
- logger.debug("Could not inspect directory %s: %s", subdir_path, iter_err)
904
-
905
- raw_count = len(list(raw_dir.glob("*"))) if raw_dir and raw_dir.exists() else None
906
- pdf_count = len(list(pdf_dir.glob("*"))) if pdf_dir and pdf_dir.exists() else None
1046
+ logger.debug(
1047
+ "Could not inspect directory %s: %s",
1048
+ subdir_path,
1049
+ iter_err,
1050
+ )
1051
+
1052
+ raw_count = (
1053
+ len(list(raw_dir.glob("*")))
1054
+ if raw_dir and raw_dir.exists()
1055
+ else None
1056
+ )
1057
+ pdf_count = (
1058
+ len(list(pdf_dir.glob("*")))
1059
+ if pdf_dir and pdf_dir.exists()
1060
+ else None
1061
+ )
907
1062
 
908
1063
  sensitive_path = self.processing_context.get("sensitive_file_path")
909
1064
  if sensitive_path:
910
1065
  sensitive_parent = Path(sensitive_path).parent
911
- sensitive_count = len(list(sensitive_parent.glob("*"))) if sensitive_parent.exists() else None
1066
+ sensitive_count = (
1067
+ len(list(sensitive_parent.glob("*")))
1068
+ if sensitive_parent.exists()
1069
+ else None
1070
+ )
912
1071
  else:
913
1072
  sensitive_dir = pdf_dir / "sensitive" if pdf_dir else None
914
- sensitive_count = len(list(sensitive_dir.glob("*"))) if sensitive_dir and sensitive_dir.exists() else None
1073
+ sensitive_count = (
1074
+ len(list(sensitive_dir.glob("*")))
1075
+ if sensitive_dir and sensitive_dir.exists()
1076
+ else None
1077
+ )
915
1078
 
916
1079
  logger.info(
917
1080
  "PDF import error cleanup counts - raw: %s, pdf: %s, sensitive: %s",
@@ -944,7 +1107,9 @@ class PdfImportService:
944
1107
  self.current_pdf = None
945
1108
  self.processing_context = {}
946
1109
 
947
- def import_simple(self, file_path: Union[Path, str], center_name: str, delete_source: bool = False) -> "RawPdfFile":
1110
+ def import_simple(
1111
+ self, file_path: Union[Path, str], center_name: str, delete_source: bool = False
1112
+ ) -> "RawPdfFile":
948
1113
  """
949
1114
  Simple PDF import without text processing or anonymization.
950
1115
  Uses centralized PDF instance management pattern.
@@ -959,7 +1124,9 @@ class PdfImportService:
959
1124
  """
960
1125
  try:
961
1126
  # Initialize simple processing context
962
- self._initialize_processing_context(file_path, center_name, delete_source, False)
1127
+ self._initialize_processing_context(
1128
+ file_path, center_name, delete_source, False
1129
+ )
963
1130
 
964
1131
  # Validate file
965
1132
  self._validate_and_prepare_file()
@@ -991,7 +1158,10 @@ class PdfImportService:
991
1158
  with transaction.atomic():
992
1159
  self.current_pdf.save()
993
1160
 
994
- logger.info("Simple import completed for RawPdfFile hash: %s", self.current_pdf.pdf_hash)
1161
+ logger.info(
1162
+ "Simple import completed for RawPdfFile hash: %s",
1163
+ self.current_pdf.pdf_hash,
1164
+ )
995
1165
  return self.current_pdf
996
1166
 
997
1167
  except Exception as e:
@@ -1001,7 +1171,9 @@ class PdfImportService:
1001
1171
  finally:
1002
1172
  self._cleanup_processing_context()
1003
1173
 
1004
- def check_storage_capacity(self, file_path: Union[Path, str], storage_root, min_required_space) -> None:
1174
+ def check_storage_capacity(
1175
+ self, file_path: Union[Path, str], storage_root, min_required_space
1176
+ ) -> None:
1005
1177
  """
1006
1178
  Check if there is sufficient storage capacity for the PDF file.
1007
1179
 
@@ -1031,12 +1203,18 @@ class PdfImportService:
1031
1203
 
1032
1204
  # Check if there is enough space
1033
1205
  if file_size > free:
1034
- raise InsufficientStorageError(f"Not enough space to store PDF file: {file_path}")
1035
- logger.info(f"Storage check passed for {file_path}: {file_size} bytes, {free} bytes available")
1206
+ raise InsufficientStorageError(
1207
+ f"Not enough space to store PDF file: {file_path}"
1208
+ )
1209
+ logger.info(
1210
+ f"Storage check passed for {file_path}: {file_size} bytes, {free} bytes available"
1211
+ )
1036
1212
 
1037
1213
  return True
1038
1214
 
1039
- def create_sensitive_file(self, pdf_instance: "RawPdfFile" = None, file_path: Union[Path, str] = None) -> None:
1215
+ def create_sensitive_file(
1216
+ self, pdf_instance: "RawPdfFile" = None, file_path: Union[Path, str] = None
1217
+ ) -> None:
1040
1218
  """
1041
1219
  Create a copy of the PDF file in the sensitive directory and update the file reference.
1042
1220
  Delete the source path to avoid duplicates.
@@ -1045,7 +1223,9 @@ class PdfImportService:
1045
1223
  Ensures the FileField points to the file under STORAGE_DIR/pdfs/sensitive and never back to raw_pdfs.
1046
1224
  """
1047
1225
  pdf_file = pdf_instance or self.current_pdf
1048
- source_path = Path(file_path) if file_path else self.processing_context.get("file_path")
1226
+ source_path = (
1227
+ Path(file_path) if file_path else self.processing_context.get("file_path")
1228
+ )
1049
1229
 
1050
1230
  if not pdf_file:
1051
1231
  raise ValueError("No PDF instance available for creating sensitive file")
@@ -1068,14 +1248,20 @@ class PdfImportService:
1068
1248
  try:
1069
1249
  target.unlink()
1070
1250
  except Exception as e:
1071
- logger.warning("Could not remove existing sensitive target %s: %s", target, e)
1251
+ logger.warning(
1252
+ "Could not remove existing sensitive target %s: %s",
1253
+ target,
1254
+ e,
1255
+ )
1072
1256
  shutil.move(str(source_path), str(target))
1073
1257
  logger.info(f"Moved PDF to sensitive directory: {target}")
1074
1258
 
1075
1259
  # Update FileField to reference the file under STORAGE_DIR
1076
1260
  # We avoid re-saving file content (the file is already at target); set .name relative to STORAGE_DIR
1077
1261
  try:
1078
- relative_name = str(target.relative_to(path_utils.STORAGE_DIR)) # Point Django FileField to sensitive storage
1262
+ relative_name = str(
1263
+ target.relative_to(path_utils.STORAGE_DIR)
1264
+ ) # Point Django FileField to sensitive storage
1079
1265
  except ValueError:
1080
1266
  # Fallback: if target is not under STORAGE_DIR, store absolute path (not ideal)
1081
1267
  relative_name = str(target)
@@ -1084,9 +1270,15 @@ class PdfImportService:
1084
1270
  if getattr(pdf_file.file, "name", None) != relative_name:
1085
1271
  pdf_file.file.name = relative_name
1086
1272
  pdf_file.save(update_fields=["file"])
1087
- logger.info("Updated PDF FileField reference to sensitive path: %s", pdf_file.file.path)
1273
+ logger.info(
1274
+ "Updated PDF FileField reference to sensitive path: %s",
1275
+ pdf_file.file.path,
1276
+ )
1088
1277
  else:
1089
- logger.debug("PDF FileField already points to sensitive path: %s", pdf_file.file.path)
1278
+ logger.debug(
1279
+ "PDF FileField already points to sensitive path: %s",
1280
+ pdf_file.file.path,
1281
+ )
1090
1282
 
1091
1283
  # Best-effort: if original source still exists (e.g., copy), remove it to avoid re-triggers
1092
1284
  try:
@@ -1097,10 +1289,17 @@ class PdfImportService:
1097
1289
  logger.warning(f"Could not delete original PDF file {source_path}: {e}")
1098
1290
 
1099
1291
  except Exception as e:
1100
- logger.warning(f"Could not create sensitive file copy for {pdf_file.pdf_hash}: {e}", exc_info=True)
1292
+ logger.warning(
1293
+ f"Could not create sensitive file copy for {pdf_file.pdf_hash}: {e}",
1294
+ exc_info=True,
1295
+ )
1101
1296
 
1102
1297
  def archive_or_quarantine_file(
1103
- self, pdf_instance: "RawPdfFile" = None, source_file_path: Union[Path, str] = None, quarantine_reason: str = None, is_pdf_problematic: bool = None
1298
+ self,
1299
+ pdf_instance: "RawPdfFile" = None,
1300
+ source_file_path: Union[Path, str] = None,
1301
+ quarantine_reason: str = None,
1302
+ is_pdf_problematic: bool = None,
1104
1303
  ) -> bool:
1105
1304
  """
1106
1305
  Archive or quarantine file based on the state of the PDF processing.
@@ -1116,8 +1315,14 @@ class PdfImportService:
1116
1315
  bool: True if file was quarantined, False if archived successfully
1117
1316
  """
1118
1317
  pdf_file = pdf_instance or self.current_pdf
1119
- file_path = Path(source_file_path) if source_file_path else self.processing_context.get("file_path")
1120
- quarantine_reason = quarantine_reason or self.processing_context.get("error_reason")
1318
+ file_path = (
1319
+ Path(source_file_path)
1320
+ if source_file_path
1321
+ else self.processing_context.get("file_path")
1322
+ )
1323
+ quarantine_reason = quarantine_reason or self.processing_context.get(
1324
+ "error_reason"
1325
+ )
1121
1326
 
1122
1327
  if not pdf_file:
1123
1328
  raise ValueError("No PDF instance available for archiving/quarantine")
@@ -1125,24 +1330,34 @@ class PdfImportService:
1125
1330
  raise ValueError("No file path available for archiving/quarantine")
1126
1331
 
1127
1332
  # Determine if the PDF is problematic
1128
- pdf_problematic = is_pdf_problematic if is_pdf_problematic is not None else pdf_file.is_problematic
1333
+ pdf_problematic = (
1334
+ is_pdf_problematic
1335
+ if is_pdf_problematic is not None
1336
+ else pdf_file.is_problematic
1337
+ )
1129
1338
 
1130
1339
  if pdf_problematic:
1131
1340
  # Quarantine the file
1132
- logger.warning(f"Quarantining problematic PDF: {pdf_file.pdf_hash}, reason: {quarantine_reason}")
1341
+ logger.warning(
1342
+ f"Quarantining problematic PDF: {pdf_file.pdf_hash}, reason: {quarantine_reason}"
1343
+ )
1133
1344
  quarantine_dir = path_utils.PDF_DIR / "quarantine"
1134
1345
  os.makedirs(quarantine_dir, exist_ok=True)
1135
1346
 
1136
1347
  quarantine_path = quarantine_dir / f"{pdf_file.pdf_hash}.pdf"
1137
1348
  try:
1138
1349
  shutil.move(file_path, quarantine_path)
1139
- pdf_file.quarantine_reason = quarantine_reason or "File processing failed"
1350
+ pdf_file.quarantine_reason = (
1351
+ quarantine_reason or "File processing failed"
1352
+ )
1140
1353
  pdf_file.save(update_fields=["quarantine_reason"])
1141
1354
  logger.info(f"Moved problematic PDF to quarantine: {quarantine_path}")
1142
1355
  return True
1143
1356
  except Exception as e:
1144
1357
  logger.error(f"Failed to quarantine PDF {pdf_file.pdf_hash}: {e}")
1145
- return True # Still consider as quarantined to prevent further processing
1358
+ return (
1359
+ True # Still consider as quarantined to prevent further processing
1360
+ )
1146
1361
  else:
1147
1362
  # Archive the file normally
1148
1363
  logger.info(f"Archiving successfully processed PDF: {pdf_file.pdf_hash}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: endoreg-db
3
- Version: 0.8.5.5
3
+ Version: 0.8.5.7
4
4
  Summary: EndoReg Db Django App
5
5
  Project-URL: Homepage, https://info.coloreg.de
6
6
  Project-URL: Repository, https://github.com/wg-lux/endoreg-db
@@ -390,7 +390,7 @@ endoreg_db/models/media/video/create_from_file.py,sha256=3n4bbzFteEOFDUuEikP0x-S
390
390
  endoreg_db/models/media/video/pipe_1.py,sha256=ljO3vO2mqqTXLZsKjzMTC6-sW4JRWMVRfJcK0n5CjKg,9740
391
391
  endoreg_db/models/media/video/pipe_2.py,sha256=DnMxW0uOqSsf7-0n9Rlvn7u89U4Jpkv7n6hFpQfUjkQ,4964
392
392
  endoreg_db/models/media/video/refactor_plan.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
393
- endoreg_db/models/media/video/video_file.py,sha256=TULxjYp2vY-uQkSIAACkDY_cXIyApqFM9iQeojKQXuQ,30179
393
+ endoreg_db/models/media/video/video_file.py,sha256=VFRgP1QRtuxzE9l74tCW7Kz5leHdv5d5i3aqtH3Y1vA,30281
394
394
  endoreg_db/models/media/video/video_file_ai.py,sha256=3ABea52FOF1qlrlxHdYhz_M3Kmqfzqtgq7M0prl-FAo,18819
395
395
  endoreg_db/models/media/video/video_file_anonymize.py,sha256=pet1UfSsbSHJJZxq6gDPifAfBWpGyEpD1jEQuSQi0Gg,16027
396
396
  endoreg_db/models/media/video/video_file_frames.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -598,7 +598,7 @@ endoreg_db/services/examination_evaluation.py,sha256=jx9IL2PIoBzjiITzs00c1XucE7A
598
598
  endoreg_db/services/finding_description_service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
599
599
  endoreg_db/services/lookup_service.py,sha256=A2t07-qlQhFEeNvOhez0GU0sxi7mnN0MIlhYzxj4W1U,10581
600
600
  endoreg_db/services/lookup_store.py,sha256=8sB2HmJQrnzq5Vfqt-UdaJLHYMRZCxnui9BCCXscnJE,4856
601
- endoreg_db/services/pdf_import.py,sha256=L5DpeyfSPDsFdra8sCsaD4hdPrAe_sohXnjZZ8S4mgQ,49791
601
+ endoreg_db/services/pdf_import.py,sha256=iipdALOF2bxtM_IOQXzPOu75nRvYVzplarzkajLoS8E,54274
602
602
  endoreg_db/services/polling_coordinator.py,sha256=alnPB-kdMyxbYaxQN9fki9dKrwmAsY3s68bUHWDSNeI,10662
603
603
  endoreg_db/services/pseudonym_service.py,sha256=CJhbtRa6K6SPbphgCZgEMi8AFQtB18CUoBDttFnxEoM,3126
604
604
  endoreg_db/services/requirements_object.py,sha256=290zf8AEbVtCoHhW4Jr7_ud-RvrqYmb1Nz9UBHtTnc0,6164
@@ -789,7 +789,7 @@ endoreg_db/views/video/video_meta.py,sha256=C1wBMTtQb_yzEUrhFGAy2UHEWMk_CbU75WXX
789
789
  endoreg_db/views/video/video_processing_history.py,sha256=mhFuS8RG5GV8E-lTtuD0qrq-bIpnUFp8vy9aERfC-J8,770
790
790
  endoreg_db/views/video/video_remove_frames.py,sha256=2FmvNrSPM0fUXiBxINN6vBUUDCqDlBkNcGR3WsLDgKo,1696
791
791
  endoreg_db/views/video/video_stream.py,sha256=kLyuf0ORTmsLeYUQkTQ6iRYqlIQozWhMMR3Lhfe_trk,12148
792
- endoreg_db-0.8.5.5.dist-info/METADATA,sha256=8JT-qB-gQdbZy2OD0K8bzTa3ngHy6nlgxj7XQC28DOU,14719
793
- endoreg_db-0.8.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
794
- endoreg_db-0.8.5.5.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
795
- endoreg_db-0.8.5.5.dist-info/RECORD,,
792
+ endoreg_db-0.8.5.7.dist-info/METADATA,sha256=3nTdLbv6Vj_I9YWKWlBG7SeQpLu5kQdiKaTQ2mcVIXU,14719
793
+ endoreg_db-0.8.5.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
794
+ endoreg_db-0.8.5.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
795
+ endoreg_db-0.8.5.7.dist-info/RECORD,,