endoreg-db 0.8.5.3__py3-none-any.whl → 0.8.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

@@ -1,19 +1,22 @@
1
- from rest_framework.views import APIView
2
- from rest_framework.response import Response
3
- from rest_framework import status
4
1
  import logging
5
- from pathlib import Path
2
+
6
3
  from django.db import transaction
4
+ from rest_framework import status
5
+ from rest_framework.response import Response
6
+ from rest_framework.views import APIView
7
+
7
8
  from ...models import RawPdfFile, SensitiveMeta
8
9
  from ...services.pdf_import import PdfImportService
10
+
9
11
  logger = logging.getLogger(__name__)
10
12
 
13
+
11
14
  class PdfReimportView(APIView):
12
15
  """
13
16
  API endpoint to re-import a pdf file and regenerate metadata.
14
17
  This is useful when OCR failed or metadata is incomplete.
15
18
  """
16
-
19
+
17
20
  def __init__(self, **kwargs):
18
21
  super().__init__(**kwargs)
19
22
  self.pdf_service = PdfImportService()
@@ -22,140 +25,153 @@ class PdfReimportView(APIView):
22
25
  """
23
26
  Re-import a pdf file to regenerate SensitiveMeta and other metadata.
24
27
  Instead of creating a new pdf, this updates the existing one.
25
-
28
+
26
29
  Args:
27
30
  request: HTTP request object
28
31
  pk: PDF primary key (ID)
29
32
  """
30
33
  pdf_id = pk # Align with media framework naming convention
31
-
34
+
32
35
  # Validate pdf_id parameter
33
36
  if not pdf_id or not isinstance(pdf_id, int):
34
37
  return Response(
35
- {"error": "Invalid PDF ID provided."},
36
- status=status.HTTP_400_BAD_REQUEST
38
+ {"error": "Invalid PDF ID provided."},
39
+ status=status.HTTP_400_BAD_REQUEST,
37
40
  )
38
41
 
39
42
  try:
40
43
  pdf = RawPdfFile.objects.get(id=pdf_id)
41
- logger.info(f"Found PDF {pdf.uuid} (ID: {pdf_id}) for re-import")
44
+ logger.info(f"Found PDF {pdf.pdf_hash} (ID: {pdf_id}) for re-import")
42
45
  except RawPdfFile.DoesNotExist:
43
46
  logger.warning(f"PDF with ID {pdf_id} not found")
44
47
  return Response(
45
- {"error": f"PDF with ID {pdf_id} not found."},
46
- status=status.HTTP_404_NOT_FOUND
48
+ {"error": f"PDF with ID {pdf_id} not found."},
49
+ status=status.HTTP_404_NOT_FOUND,
47
50
  )
48
51
 
52
+ # Get raw file path using the model method
53
+ raw_file_path = pdf.get_raw_file_path()
49
54
 
50
-
51
- # Check if the raw file actually exists on disk
52
- raw_file_path = Path(pdf.file.path)
53
- if not raw_file_path.exists():
54
- logger.error(f"Raw file not found on disk: {raw_file_path}")
55
+ if not raw_file_path or not raw_file_path.exists():
56
+ logger.error(
57
+ f"Raw PDF file not found for hash {pdf.pdf_hash}: {raw_file_path}"
58
+ )
55
59
  return Response(
56
- {"error": f"PDF file not found on server: {raw_file_path.name}"},
57
- status=status.HTTP_400_BAD_REQUEST
60
+ {
61
+ "error": f"Raw PDF file not found for PDF {pdf.pdf_hash}. Please upload the original file again."
62
+ },
63
+ status=status.HTTP_404_NOT_FOUND,
58
64
  )
59
65
 
60
66
  # Check if PDF has required relationships
61
67
  if not pdf.center:
62
- logger.warning(f"PDF {pdf.uuid} has no associated center")
68
+ logger.warning(f"PDF {pdf.pdf_hash} has no associated center")
63
69
  return Response(
64
- {"error": "Video has no associated center."},
65
- status=status.HTTP_400_BAD_REQUEST
70
+ {"error": "PDF has no associated center."},
71
+ status=status.HTTP_400_BAD_REQUEST,
66
72
  )
67
73
 
68
74
  try:
69
- logger.info(f"Starting in-place re-import for pdf {pdf.uuid} (ID: {pdf_id})")
70
-
75
+ logger.info(f"Starting re-import for PDF {pdf.pdf_hash} (ID: {pdf_id})")
76
+
71
77
  with transaction.atomic():
72
78
  # Clear existing metadata to force regeneration
73
79
  old_meta_id = None
74
80
  if pdf.sensitive_meta:
75
- old_meta_id = pdf.sensitive_meta.id
76
- logger.info(f"Clearing existing SensitiveMeta {old_meta_id} for pdf {pdf.uuid}")
77
- pdf.sensitive_meta = None
78
- pdf.save(update_fields=['sensitive_meta'])
79
-
81
+ old_meta_id = pdf.sensitive_meta.pk
82
+ logger.info(
83
+ f"Clearing existing SensitiveMeta {old_meta_id} for PDF {pdf.pdf_hash}"
84
+ )
85
+ pdf.sensitive_meta = None # type: ignore
86
+ pdf.save(update_fields=["sensitive_meta"])
87
+
80
88
  # Delete the old SensitiveMeta record
81
89
  try:
82
- SensitiveMeta.objects.filter(id=old_meta_id).delete()
90
+ SensitiveMeta.objects.filter(pk=old_meta_id).delete()
83
91
  logger.info(f"Deleted old SensitiveMeta {old_meta_id}")
84
92
  except Exception as e:
85
- logger.warning(f"Could not delete old SensitiveMeta {old_meta_id}: {e}")
86
-
87
-
88
-
89
-
90
-
91
- # Ensure minimum patient data is available
92
- logger.info(f"Ensuring minimum patient data for {pdf.uuid}")
93
- self.pdf_service._ensure_default_patient_data(pdf)
94
-
95
- # Refresh from database to get updated data
96
- pdf.refresh_from_db()
97
-
98
- # Use VideoImportService for anonymization
93
+ logger.warning(
94
+ f"Could not delete old SensitiveMeta {old_meta_id}: {e}"
95
+ )
96
+
97
+ # Use PdfImportService for reprocessing
99
98
  try:
100
-
101
- logger.info(f"Starting anonymization using VideoImportService for {pdf.uuid}")
99
+ logger.info(
100
+ f"Starting reprocessing using PdfImportService for {pdf.pdf_hash}"
101
+ )
102
102
  self.pdf_service.import_and_anonymize(
103
103
  file_path=raw_file_path,
104
104
  center_name=pdf.center.name,
105
- processor_name=pdf.processor.name if pdf.processor else "Unknown",
106
- save_video=True,
107
- delete_source=False
105
+ delete_source=False, # Don't delete during reimport
106
+ retry=True, # Mark as retry attempt
108
107
  )
109
-
110
- logger.info(f"VideoImportService anonymization completed for {pdf.uuid}")
111
-
112
-
113
- return Response({
114
- "message": "Video re-import with VideoImportService completed successfully.",
115
- "pdf_id": pdf_id,
116
- "uuid": str(pdf.uuid),
117
- "frame_cleaning_applied": True,
118
- "sensitive_meta_created": pdf.sensitive_meta is not None,
119
- "sensitive_meta_id": pdf.sensitive_meta.id if pdf.sensitive_meta else None,
120
- "updated_in_place": True,
121
- "status": "done"
122
- }, status=status.HTTP_200_OK)
123
-
108
+
109
+ logger.info(
110
+ f"PdfImportService reprocessing completed for {pdf.pdf_hash}"
111
+ )
112
+
113
+ # Refresh to get updated state
114
+ pdf.refresh_from_db()
115
+
116
+ return Response(
117
+ {
118
+ "message": "PDF re-import completed successfully.",
119
+ "pdf_id": pdf_id,
120
+ "pdf_hash": str(pdf.pdf_hash),
121
+ "sensitive_meta_created": pdf.sensitive_meta is not None,
122
+ "sensitive_meta_id": pdf.sensitive_meta.pk
123
+ if pdf.sensitive_meta
124
+ else None,
125
+ "text_extracted": bool(pdf.text),
126
+ "anonymized": pdf.anonymized,
127
+ "status": "done",
128
+ },
129
+ status=status.HTTP_200_OK,
130
+ )
131
+
124
132
  except Exception as e:
125
- logger.exception(f"VideoImportService anonymization failed for pdf {pdf.uuid}: {e}")
126
- logger.warning("Continuing without anonymization due to error")
127
-
128
- # Refresh from database to get final state
129
- pdf.refresh_from_db()
130
-
131
- return Response({
132
- "message": "PDF re-import completed successfully.",
133
- "pdf_id": pdf_id,
134
- "uuid": str(pdf.uuid),
135
- "sensitive_meta_created": pdf.sensitive_meta is not None,
136
- "sensitive_meta_id": pdf.sensitive_meta.id if pdf.sensitive_meta else None,
137
- "updated_in_place": True,
138
- "status": "done"
139
- }, status=status.HTTP_200_OK)
133
+ logger.exception(
134
+ f"PdfImportService reprocessing failed for PDF {pdf.pdf_hash}: {e}"
135
+ )
136
+ return Response(
137
+ {
138
+ "error": f"Reprocessing failed: {str(e)}",
139
+ "error_type": "processing_error",
140
+ "pdf_id": pdf_id,
141
+ "pdf_hash": str(pdf.pdf_hash),
142
+ },
143
+ status=status.HTTP_500_INTERNAL_SERVER_ERROR,
144
+ )
140
145
 
141
146
  except Exception as e:
142
- logger.error(f"Failed to re-import pdf {pdf.uuid}: {str(e)}", exc_info=True)
143
-
147
+ logger.error(
148
+ f"Failed to re-import PDF {pdf.pdf_hash}: {str(e)}", exc_info=True
149
+ )
150
+
144
151
  # Handle specific error types
145
152
  error_msg = str(e)
146
- if any(phrase in error_msg.lower() for phrase in ["insufficient storage", "no space left", "disk full"]):
153
+ if any(
154
+ phrase in error_msg.lower()
155
+ for phrase in ["insufficient storage", "no space left", "disk full"]
156
+ ):
147
157
  # Storage error - return specific error message
148
- return Response({
149
- "error": f"Storage error during re-import: {error_msg}",
150
- "error_type": "storage_error",
151
- "pdf_id": pdf_id,
152
- "uuid": str(pdf.uuid)
153
- }, status=status.HTTP_507_INSUFFICIENT_STORAGE)
158
+ return Response(
159
+ {
160
+ "error": f"Storage error during re-import: {error_msg}",
161
+ "error_type": "storage_error",
162
+ "pdf_id": pdf_id,
163
+ "pdf_hash": str(pdf.pdf_hash),
164
+ },
165
+ status=status.HTTP_507_INSUFFICIENT_STORAGE,
166
+ )
154
167
  else:
155
168
  # Other errors
156
- return Response({
157
- "error": f"Re-import failed: {error_msg}",
158
- "error_type": "processing_error",
159
- "pdf_id": pdf_id,
160
- "uuid": str(pdf.uuid)
161
- }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
169
+ return Response(
170
+ {
171
+ "error": f"Re-import failed: {error_msg}",
172
+ "error_type": "processing_error",
173
+ "pdf_id": pdf_id,
174
+ "pdf_hash": str(pdf.pdf_hash),
175
+ },
176
+ status=status.HTTP_500_INTERNAL_SERVER_ERROR,
177
+ )