endoreg-db 0.8.2__py3-none-any.whl → 0.8.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

File without changes
@@ -0,0 +1,30 @@
1
+ from django.db import migrations, models
2
+
3
+
4
+ def populate_display_name(apps, schema_editor):
5
+ Center = apps.get_model('endoreg_db', 'Center')
6
+ for center in Center.objects.all():
7
+ if not center.display_name:
8
+ center.display_name = center.name
9
+ center.save(update_fields=['display_name'])
10
+
11
+
12
+ def reset_display_name(apps, schema_editor):
13
+ Center = apps.get_model('endoreg_db', 'Center')
14
+ Center.objects.update(display_name='')
15
+
16
+
17
+ class Migration(migrations.Migration):
18
+
19
+ dependencies = [
20
+ ('endoreg_db', '0002_add_video_correction_models'),
21
+ ]
22
+
23
+ operations = [
24
+ migrations.AddField(
25
+ model_name='center',
26
+ name='display_name',
27
+ field=models.CharField(blank=True, default='', max_length=255),
28
+ ),
29
+ migrations.RunPython(populate_display_name, reset_display_name),
30
+ ]
@@ -19,6 +19,7 @@ class Center(models.Model):
19
19
 
20
20
  # import_id = models.IntegerField(primary_key=True)
21
21
  name = models.CharField(max_length=255)
22
+ display_name = models.CharField(max_length=255, blank=True, default="")
22
23
 
23
24
  first_names = models.ManyToManyField(
24
25
  to="FirstName",
@@ -45,8 +46,13 @@ class Center(models.Model):
45
46
  def natural_key(self) -> tuple[str]:
46
47
  return (self.name,)
47
48
 
49
+ def save(self, *args, **kwargs):
50
+ if not self.display_name:
51
+ self.display_name = self.name
52
+ super().save(*args, **kwargs)
53
+
48
54
  def __str__(self) -> str:
49
- return str(object=self.name)
55
+ return str(object=self.display_name or self.name)
50
56
 
51
57
  def get_first_names(self):
52
58
  return self.first_names.all()
@@ -383,37 +383,42 @@ class RawPdfFile(models.Model):
383
383
  new_file_name, _uuid = get_uuid_filename(file_path)
384
384
  logger.info(f"Generated new filename: {new_file_name}")
385
385
 
386
- # Create model instance (without file initially)
387
- raw_pdf = cls(
388
- pdf_hash=pdf_hash,
389
- center=center,
390
- )
391
-
392
- # Assign file using Django's File wrapper and save
386
+ # Create model instance via manager so creation can be intercepted/mocked during tests
393
387
  try:
394
388
  with file_path.open("rb") as f:
395
389
  django_file = File(f, name=new_file_name)
396
- raw_pdf.file = django_file # type: ignore # Assign the file object
397
- # Save the instance - Django storage handles the file copy/move
398
- raw_pdf.save()
399
- _file = raw_pdf.file
400
- assert _file is not None
401
- logger.info(f"Created and saved new RawPdfFile {raw_pdf.pk} with file {_file.name}")
402
-
403
- # Verify file exists in storage after save
404
- if not _file.storage.exists(_file.name):
405
- logger.error(f"File was not saved correctly to storage path {_file.name} after model save.")
406
- raise IOError(f"File not found at expected storage path after save: {_file.name}")
407
- # Log the absolute path for debugging if possible (depends on storage)
408
- try:
409
- logger.info(f"File saved to absolute path: {_file.path}")
410
- except NotImplementedError:
411
- logger.info(f"File saved to storage path: {_file.name} (Absolute path not available from storage)")
390
+ raw_pdf = cls.objects.create(
391
+ pdf_hash=pdf_hash,
392
+ center=center,
393
+ file=django_file,
394
+ )
395
+
396
+ _file = raw_pdf.file
397
+ assert _file is not None
398
+ logger.info(
399
+ "Created and saved new RawPdfFile %s with file %s", raw_pdf.pk, _file.name
400
+ )
401
+
402
+ if not _file.storage.exists(_file.name):
403
+ logger.error(
404
+ "File was not saved correctly to storage path %s after model save.",
405
+ _file.name,
406
+ )
407
+ raise IOError(
408
+ f"File not found at expected storage path after save: {_file.name}"
409
+ )
410
+
411
+ try:
412
+ logger.info("File saved to absolute path: %s", _file.path)
413
+ except NotImplementedError:
414
+ logger.info(
415
+ "File saved to storage path: %s (Absolute path not available from storage)",
416
+ _file.name,
417
+ )
412
418
 
413
419
  except Exception as e:
414
- logger.error(f"Error processing or saving file {file_path} for new record: {e}")
415
- # If save failed, the instance might be partially created but not fully saved.
416
- raise # Re-raise the exception
420
+ logger.error("Error processing or saving file %s for new record: %s", file_path, e)
421
+ raise
417
422
 
418
423
  # Delete source file *after* successful save and verification
419
424
  if delete_source:
@@ -6,7 +6,8 @@ from typing import TYPE_CHECKING, Optional, Type
6
6
 
7
7
  # Import the new exceptions from the correct path
8
8
  from endoreg_db.exceptions import InsufficientStorageError, TranscodingError
9
- from ...utils import VIDEO_DIR, TMP_VIDEO_DIR, data_paths
9
+ from ...utils import VIDEO_DIR, TMP_VIDEO_DIR
10
+ from importlib import import_module
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from endoreg_db.models import VideoFile
@@ -170,6 +171,22 @@ def atomic_move_with_fallback(src_path: Path, dst_path: Path) -> bool:
170
171
  raise
171
172
 
172
173
 
174
+ def _get_data_paths():
175
+ """Return the current data_paths mapping (supports patched instances in tests)."""
176
+ utils_module = import_module("endoreg_db.utils")
177
+ return getattr(utils_module, "data_paths")
178
+
179
+
180
+ def _get_path(mapping, key, default):
181
+ """Access mapping by key using __getitem__ so MagicMocks with side effects work."""
182
+ if mapping is None:
183
+ return default
184
+ try:
185
+ return mapping[key]
186
+ except (KeyError, TypeError):
187
+ return default
188
+
189
+
173
190
  def _create_from_file(
174
191
  cls_model: Type["VideoFile"],
175
192
  file_path: Path,
@@ -199,8 +216,12 @@ def _create_from_file(
199
216
 
200
217
  try:
201
218
  # Ensure we operate under the canonical video path root
202
- video_dir = data_paths.get("video", video_dir)
203
- storage_root = Path(video_dir).parent
219
+ data_paths = _get_data_paths()
220
+ resolved_video_dir = _get_path(data_paths, "video", video_dir)
221
+ video_dir = Path(resolved_video_dir)
222
+ storage_root_default = Path(video_dir).parent
223
+ resolved_storage_root = _get_path(data_paths, "storage", storage_root_default)
224
+ storage_root = Path(resolved_storage_root)
204
225
  storage_root.mkdir(parents=True, exist_ok=True)
205
226
 
206
227
  # Check storage capacity before starting any work
@@ -300,7 +321,8 @@ def _create_from_file(
300
321
  # 8. Create the VideoFile instance
301
322
  logger.info("Creating new VideoFile instance with UUID: %s", uuid_val)
302
323
  # Store FileField path relative to storage root including the videos prefix
303
- relative_name = (final_storage_path.relative_to(data_paths['storage'])).as_posix()
324
+ storage_base = Path(_get_path(data_paths, "storage", final_storage_path.parent))
325
+ relative_name = (final_storage_path.relative_to(storage_base)).as_posix()
304
326
  video = cls_model(
305
327
  uuid=uuid_val,
306
328
  raw_file=relative_name,
@@ -126,7 +126,7 @@ class VideoFile(models.Model):
126
126
 
127
127
  sensitive_meta = models.OneToOneField(
128
128
  "SensitiveMeta", on_delete=models.SET_NULL, null=True, blank=True, related_name="video_file"
129
- )
129
+ ) # type: ignore
130
130
  center = models.ForeignKey("Center", on_delete=models.PROTECT)
131
131
  processor = models.ForeignKey(
132
132
  "EndoscopyProcessor", on_delete=models.PROTECT, blank=True, null=True
@@ -465,7 +465,18 @@ class VideoFile(models.Model):
465
465
  # Use proper database connection
466
466
  if using is None:
467
467
  using = 'default'
468
-
468
+
469
+ raw_file_path = self.get_raw_file_path()
470
+ if raw_file_path:
471
+ raw_file_path = Path(raw_file_path)
472
+ lock_path = raw_file_path.with_suffix(raw_file_path.suffix + ".lock")
473
+ if lock_path.exists():
474
+ try:
475
+ lock_path.unlink()
476
+ logger.info(f"Removed processing lock: {lock_path}")
477
+ except Exception as e:
478
+ logger.warning(f"Could not remove processing lock {lock_path}: {e}")
479
+
469
480
  try:
470
481
  # Call parent delete with proper parameters
471
482
  super().delete(using=using, keep_parents=keep_parents)
@@ -572,15 +583,28 @@ class VideoFile(models.Model):
572
583
  super().save(*args, **kwargs)
573
584
 
574
585
  def get_or_create_state(self) -> "VideoState":
575
- """
576
- Return the related VideoState instance for this video, creating and assigning a new one if none exists.
577
-
578
- Returns:
579
- VideoState: The associated VideoState instance.
580
- """
581
- if self.state is None:
582
- self.state = VideoState.objects.create()
583
- return self.state
586
+ """Ensure this video has a persisted ``VideoState`` and return it."""
587
+
588
+ state = self.state
589
+
590
+ # When tests reuse cached instances across database flushes, ``state`` may reference
591
+ # a row that no longer exists. Guard against that by validating persistence.
592
+ state_pk = getattr(state, "pk", None)
593
+ if state is not None and state_pk is not None:
594
+ if not VideoState.objects.filter(pk=state_pk).exists():
595
+ state = None
596
+
597
+ if state is None:
598
+ # Create a fresh state to avoid refresh_from_db() failures on unsaved instances.
599
+ state = VideoState.objects.create()
600
+ self.state = state
601
+
602
+ # Persist the relation immediately if the VideoFile already exists in the DB so
603
+ # later refreshes see the association without requiring additional saves.
604
+ if self.pk:
605
+ self.save(update_fields=["state"])
606
+
607
+ return state
584
608
 
585
609
  def get_or_create_sensitive_meta(self) -> "SensitiveMeta":
586
610
  """
@@ -592,8 +616,7 @@ class VideoFile(models.Model):
592
616
  from endoreg_db.models import SensitiveMeta
593
617
  if self.sensitive_meta is None:
594
618
  self.sensitive_meta = SensitiveMeta.objects.create(center = self.center)
595
- # Mark as processed when creating new SensitiveMeta
596
- self.get_or_create_state().mark_sensitive_meta_processed(save=True)
619
+ # Do not mark processed here; it will be set after extraction/validation steps
597
620
  return self.sensitive_meta
598
621
 
599
622
  def get_outside_segments(self, only_validated: bool = False) -> models.QuerySet["LabelVideoSegment"]:
@@ -12,6 +12,7 @@ from django.conf import settings
12
12
 
13
13
  from endoreg_db.utils.hashs import get_video_hash
14
14
  from endoreg_db.utils.validate_endo_roi import validate_endo_roi
15
+ from endoreg_db.utils.paths import STORAGE_DIR
15
16
  from ....utils.video.ffmpeg_wrapper import assemble_video_from_frames
16
17
  from ...utils import anonymize_frame # Import from models.utils
17
18
  from .video_file_segments import _get_outside_frames, _get_outside_frame_numbers
@@ -268,7 +269,7 @@ def _anonymize(video: "VideoFile", delete_original_raw: bool = True) -> bool:
268
269
  raise ValueError(f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid}).")
269
270
 
270
271
  video.processed_video_hash = new_processed_hash
271
- video.processed_file.name = video.get_target_anonymized_video_path().relative_to(settings.MEDIA_ROOT).as_posix()
272
+ video.processed_file.name = video.get_target_anonymized_video_path().relative_to(STORAGE_DIR).as_posix()
272
273
 
273
274
  update_fields = [
274
275
  "processed_video_hash",
@@ -97,6 +97,7 @@ def _extract_frame_range(
97
97
  return True # Indicate success as frames are considered present
98
98
 
99
99
  frame_dir.mkdir(parents=True, exist_ok=True)
100
+ extracted_paths = []
100
101
 
101
102
  try:
102
103
  logger.info("Starting frame range extraction [%d, %d) for video %s to %s", start_frame, end_frame, video.uuid, frame_dir)
@@ -111,6 +112,17 @@ def _extract_frame_range(
111
112
 
112
113
  return True
113
114
 
115
+ except FileNotFoundError as err:
116
+ logger.error(
117
+ "Frame range extraction [%d, %d) failed for video %s: %s",
118
+ start_frame,
119
+ end_frame,
120
+ video.uuid,
121
+ err,
122
+ exc_info=True,
123
+ )
124
+ raise
125
+
114
126
  except Exception as e:
115
127
  logger.error("Frame range extraction [%d, %d) or DB update failed for video %s: %s", start_frame, end_frame, video.uuid, e, exc_info=True)
116
128
 
@@ -32,13 +32,15 @@ def _get_raw_file_path(video: "VideoFile") -> Optional[Path]:
32
32
  if sensitive_path.exists():
33
33
  return sensitive_path.resolve()
34
34
 
35
+ # Check direct raw_file.path if available
35
36
  # Check direct raw_file.path if available
36
37
  try:
37
38
  direct_path = Path(video.raw_file.path)
38
39
  if direct_path.exists():
39
40
  return direct_path.resolve()
40
- except Exception:
41
- pass # Fallback to original behavior
41
+ except Exception as e:
42
+ logger.debug("Could not access direct raw_file.path for video %s: %s", video.uuid, e)
43
+ # Fallback to checking alternative paths
42
44
 
43
45
  # Check common alternative paths
44
46
  alternative_paths = [
@@ -13,7 +13,7 @@ else:
13
13
  ENDOREG_CENTER_ID = settings.ENDOREG_CENTER_ID
14
14
 
15
15
  # Import the new utility function
16
- from ...utils.video.ffmpeg_wrapper import get_stream_info
16
+ from ...utils.video import ffmpeg_wrapper
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -214,7 +214,7 @@ class FFMpegMeta(models.Model):
214
214
  """
215
215
  logger.info("Running ffprobe on %s", file_path)
216
216
  try:
217
- probe_data = get_stream_info(file_path) # Use the new utility
217
+ probe_data = ffmpeg_wrapper.get_stream_info(file_path) # Use the new utility
218
218
  except Exception as probe_err:
219
219
  logger.error("ffprobe execution failed for %s: %s", file_path, probe_err, exc_info=True)
220
220
  raise RuntimeError(f"ffprobe execution failed for {file_path}") from probe_err
@@ -5,6 +5,7 @@ Provides high-level functions for importing and anonymizing PDF files,
5
5
  combining RawPdfFile creation with text extraction and anonymization.
6
6
  """
7
7
  from datetime import date, datetime
8
+ import errno
8
9
  import logging
9
10
  import shutil
10
11
  import sys
@@ -13,12 +14,11 @@ import hashlib
13
14
  from pathlib import Path
14
15
  from typing import TYPE_CHECKING, Union
15
16
  from contextlib import contextmanager
16
- from django.conf.locale import tr
17
17
  from django.db import transaction
18
18
  from endoreg_db.models.media.pdf.raw_pdf import RawPdfFile
19
19
  from endoreg_db.models.state.raw_pdf import RawPdfState
20
20
  from endoreg_db.models import SensitiveMeta
21
- from endoreg_db.utils.paths import PDF_DIR, STORAGE_DIR
21
+ from endoreg_db.utils import paths as path_utils
22
22
  import time
23
23
 
24
24
  logger = logging.getLogger(__name__)
@@ -111,14 +111,44 @@ class PdfImportService:
111
111
  break
112
112
  h.update(b)
113
113
  return h.hexdigest()
114
+
115
+ def _get_pdf_dir(self) -> Path | None:
116
+ """Resolve the configured PDF directory to a concrete Path."""
117
+ candidate = getattr(path_utils, "PDF_DIR", None)
118
+ if isinstance(candidate, Path):
119
+ return candidate
120
+ if candidate is None:
121
+ return None
122
+ try:
123
+ derived = candidate / "."
124
+ except Exception:
125
+ derived = None
126
+
127
+ if derived is not None:
128
+ try:
129
+ return Path(derived)
130
+ except Exception:
131
+ return None
132
+
133
+ try:
134
+ return Path(str(candidate))
135
+ except Exception:
136
+ return None
114
137
 
115
138
  def _quarantine(self, source: Path) -> Path:
116
139
  """Move file to quarantine directory to prevent re-processing."""
117
- qdir = PDF_DIR / "_processing"
140
+ qdir = path_utils.PDF_DIR / "_processing"
118
141
  qdir.mkdir(parents=True, exist_ok=True)
119
142
  target = qdir / source.name
120
- # atomic rename on same filesystem
121
- source.rename(target)
143
+ try:
144
+ # Try atomic rename first (fastest when on same filesystem)
145
+ source.rename(target)
146
+ except OSError as exc:
147
+ if exc.errno == errno.EXDEV:
148
+ # Cross-device move, fall back to shutil.move which copies+removes
149
+ shutil.move(str(source), str(target))
150
+ else:
151
+ raise
122
152
  return target
123
153
 
124
154
  def _ensure_state(self, pdf_file: "RawPdfFile"):
@@ -287,6 +317,7 @@ class PdfImportService:
287
317
  """Initialize the processing context for the current PDF."""
288
318
  self.processing_context = {
289
319
  'file_path': Path(file_path),
320
+ 'original_file_path': Path(file_path),
290
321
  'center_name': center_name,
291
322
  'delete_source': delete_source,
292
323
  'retry': retry,
@@ -379,11 +410,18 @@ class PdfImportService:
379
410
 
380
411
  def _setup_processing_environment(self):
381
412
  """Setup processing environment and state."""
413
+ original_path = self.processing_context.get('file_path')
414
+
382
415
  # Create sensitive file copy
383
- self.create_sensitive_file(self.current_pdf, self.processing_context['file_path'])
416
+ self.create_sensitive_file(self.current_pdf, original_path)
384
417
 
385
418
  # Update file path to point to sensitive copy
386
419
  self.processing_context['file_path'] = self.current_pdf.file.path
420
+ self.processing_context['sensitive_copy_created'] = True
421
+ try:
422
+ self.processing_context['sensitive_file_path'] = Path(self.current_pdf.file.path)
423
+ except Exception:
424
+ self.processing_context['sensitive_file_path'] = None
387
425
 
388
426
  # Ensure state exists
389
427
  state = self.current_pdf.get_or_create_state()
@@ -415,14 +453,14 @@ class PdfImportService:
415
453
  logger.info("Starting text extraction and metadata processing with ReportReader...")
416
454
 
417
455
  # Setup output directories
418
- crops_dir = PDF_DIR / 'cropped_regions'
419
- anonymized_dir = PDF_DIR / 'anonymized'
456
+ crops_dir = path_utils.PDF_DIR / 'cropped_regions'
457
+ anonymized_dir = path_utils.PDF_DIR / 'anonymized'
420
458
  crops_dir.mkdir(parents=True, exist_ok=True)
421
459
  anonymized_dir.mkdir(parents=True, exist_ok=True)
422
460
 
423
461
  # Initialize ReportReader
424
462
  report_reader = ReportReader(
425
- report_root_path=STORAGE_DIR,
463
+ report_root_path=str(path_utils.STORAGE_DIR),
426
464
  locale="de_DE",
427
465
  text_date_format="%d.%m.%Y"
428
466
  )
@@ -603,7 +641,7 @@ class PdfImportService:
603
641
  try:
604
642
  # Prefer storing a path relative to STORAGE_DIR so Django serves it correctly
605
643
  try:
606
- relative_name = str(anonymized_path.relative_to(STORAGE_DIR))
644
+ relative_name = str(anonymized_path.relative_to(path_utils.STORAGE_DIR))
607
645
  except ValueError:
608
646
  # Fallback to absolute path if the file lives outside STORAGE_DIR
609
647
  relative_name = str(anonymized_path)
@@ -717,18 +755,96 @@ class PdfImportService:
717
755
  except Exception as e:
718
756
  logger.warning(f"Error during cleanup: {e}")
719
757
  finally:
758
+ # Remove any sensitive copy created during this processing run
759
+ sensitive_created = self.processing_context.get('sensitive_copy_created')
760
+ if sensitive_created:
761
+ pdf_obj = self.current_pdf
762
+ try:
763
+ if pdf_obj:
764
+ file_field = getattr(pdf_obj, "file", None)
765
+ if file_field and getattr(file_field, "name", None):
766
+ storage_name = file_field.name
767
+ file_field.delete(save=False)
768
+ logger.debug("Deleted sensitive copy %s during error cleanup", storage_name)
769
+ except Exception as cleanup_exc:
770
+ logger.warning("Failed to remove sensitive copy during error cleanup: %s", cleanup_exc)
771
+
720
772
  # Always clean up processed files set to prevent blocks
721
773
  file_path = self.processing_context.get('file_path')
722
774
  if file_path and str(file_path) in self.processed_files:
723
775
  self.processed_files.remove(str(file_path))
724
776
  logger.debug(f"Removed {file_path} from processed files during error cleanup")
725
777
 
778
+ try:
779
+ original_path = self.processing_context.get('original_file_path')
780
+ logger.debug("PDF cleanup original path: %s (%s)", original_path, type(original_path))
781
+ raw_dir = original_path.parent if isinstance(original_path, Path) else None
782
+ if (
783
+ isinstance(original_path, Path)
784
+ and original_path.exists()
785
+ and not self.processing_context.get('sensitive_copy_created')
786
+ ):
787
+ try:
788
+ original_path.unlink()
789
+ logger.info("Removed original file %s during error cleanup", original_path)
790
+ except Exception as remove_exc:
791
+ logger.warning("Could not remove original file %s during error cleanup: %s", original_path, remove_exc)
792
+ pdf_dir = self._get_pdf_dir()
793
+ if not pdf_dir and raw_dir:
794
+ base_dir = raw_dir.parent
795
+ dir_name = getattr(path_utils, "PDF_DIR_NAME", "pdfs")
796
+ fallback_pdf_dir = base_dir / dir_name
797
+ logger.debug(
798
+ "PDF cleanup fallback resolution - base: %s, dir_name: %s, exists: %s",
799
+ base_dir,
800
+ dir_name,
801
+ fallback_pdf_dir.exists(),
802
+ )
803
+ if fallback_pdf_dir.exists():
804
+ pdf_dir = fallback_pdf_dir
805
+
806
+ # Remove empty PDF subdirectories that might have been created during setup
807
+ if pdf_dir and pdf_dir.exists():
808
+ for subdir_name in ("sensitive", "cropped_regions", "anonymized", "_processing"):
809
+ subdir_path = pdf_dir / subdir_name
810
+ if subdir_path.exists() and subdir_path.is_dir():
811
+ try:
812
+ next(subdir_path.iterdir())
813
+ except StopIteration:
814
+ try:
815
+ subdir_path.rmdir()
816
+ logger.debug("Removed empty directory %s during error cleanup", subdir_path)
817
+ except OSError as rm_err:
818
+ logger.debug("Could not remove directory %s: %s", subdir_path, rm_err)
819
+ except Exception as iter_err:
820
+ logger.debug("Could not inspect directory %s: %s", subdir_path, iter_err)
821
+
822
+ raw_count = len(list(raw_dir.glob("*"))) if raw_dir and raw_dir.exists() else None
823
+ pdf_count = len(list(pdf_dir.glob("*"))) if pdf_dir and pdf_dir.exists() else None
824
+
825
+ sensitive_path = self.processing_context.get('sensitive_file_path')
826
+ if sensitive_path:
827
+ sensitive_parent = Path(sensitive_path).parent
828
+ sensitive_count = len(list(sensitive_parent.glob("*"))) if sensitive_parent.exists() else None
829
+ else:
830
+ sensitive_dir = pdf_dir / "sensitive" if pdf_dir else None
831
+ sensitive_count = len(list(sensitive_dir.glob("*"))) if sensitive_dir and sensitive_dir.exists() else None
832
+
833
+ logger.info(
834
+ "PDF import error cleanup counts - raw: %s, pdf: %s, sensitive: %s",
835
+ raw_count,
836
+ pdf_count,
837
+ sensitive_count,
838
+ )
839
+ except Exception:
840
+ pass
841
+
726
842
  def _cleanup_processing_context(self):
727
843
  """Cleanup processing context."""
728
844
  try:
729
845
  # Clean up temporary directories
730
846
  if self.processing_context.get('text_extracted'):
731
- crops_dir = PDF_DIR / 'cropped_regions'
847
+ crops_dir = path_utils.PDF_DIR / 'cropped_regions'
732
848
  if crops_dir.exists() and not any(crops_dir.iterdir()):
733
849
  crops_dir.rmdir()
734
850
 
@@ -857,7 +973,7 @@ class PdfImportService:
857
973
  if not source_path:
858
974
  raise ValueError("No file path available for creating sensitive file")
859
975
 
860
- SENSITIVE_DIR = PDF_DIR / "sensitive"
976
+ SENSITIVE_DIR = path_utils.PDF_DIR / "sensitive"
861
977
  target = SENSITIVE_DIR / f"{pdf_file.pdf_hash}.pdf"
862
978
 
863
979
  try:
@@ -880,7 +996,7 @@ class PdfImportService:
880
996
  # Update FileField to reference the file under STORAGE_DIR
881
997
  # We avoid re-saving file content (the file is already at target); set .name relative to STORAGE_DIR
882
998
  try:
883
- relative_name = str(target.relative_to(STORAGE_DIR)) #just point the Django FileField to the file that the anonymizer already created in data/pdfs/anonymized/.
999
+ relative_name = str(target.relative_to(path_utils.STORAGE_DIR)) # Point Django FileField to sensitive storage
884
1000
  except ValueError:
885
1001
  # Fallback: if target is not under STORAGE_DIR, store absolute path (not ideal)
886
1002
  relative_name = str(target)
@@ -934,7 +1050,7 @@ class PdfImportService:
934
1050
  if pdf_problematic:
935
1051
  # Quarantine the file
936
1052
  logger.warning(f"Quarantining problematic PDF: {pdf_file.pdf_hash}, reason: {quarantine_reason}")
937
- quarantine_dir = PDF_DIR / "quarantine"
1053
+ quarantine_dir = path_utils.PDF_DIR / "quarantine"
938
1054
  os.makedirs(quarantine_dir, exist_ok=True)
939
1055
 
940
1056
  quarantine_path = quarantine_dir / f"{pdf_file.pdf_hash}.pdf"
@@ -950,7 +1066,7 @@ class PdfImportService:
950
1066
  else:
951
1067
  # Archive the file normally
952
1068
  logger.info(f"Archiving successfully processed PDF: {pdf_file.pdf_hash}")
953
- archive_dir = PDF_DIR / "processed"
1069
+ archive_dir = path_utils.PDF_DIR / "processed"
954
1070
  os.makedirs(archive_dir, exist_ok=True)
955
1071
 
956
1072
  archive_path = archive_dir / f"{pdf_file.pdf_hash}.pdf"