endoreg-db 0.8.3.3__py3-none-any.whl → 0.8.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +23 -1
- endoreg_db/data/setup_config.yaml +38 -0
- endoreg_db/management/commands/create_model_meta_from_huggingface.py +1 -2
- endoreg_db/management/commands/load_ai_model_data.py +18 -15
- endoreg_db/management/commands/setup_endoreg_db.py +218 -33
- endoreg_db/models/media/pdf/raw_pdf.py +241 -97
- endoreg_db/models/media/video/pipe_1.py +30 -33
- endoreg_db/models/media/video/video_file.py +300 -187
- endoreg_db/models/medical/hardware/endoscopy_processor.py +10 -1
- endoreg_db/models/metadata/model_meta_logic.py +34 -45
- endoreg_db/models/metadata/sensitive_meta_logic.py +555 -150
- endoreg_db/serializers/__init__.py +26 -55
- endoreg_db/serializers/misc/__init__.py +1 -1
- endoreg_db/serializers/misc/file_overview.py +65 -35
- endoreg_db/serializers/misc/{vop_patient_data.py → sensitive_patient_data.py} +1 -1
- endoreg_db/serializers/video_examination.py +198 -0
- endoreg_db/services/lookup_service.py +228 -58
- endoreg_db/services/lookup_store.py +174 -30
- endoreg_db/services/pdf_import.py +585 -282
- endoreg_db/services/video_import.py +493 -240
- endoreg_db/urls/__init__.py +36 -23
- endoreg_db/urls/label_video_segments.py +2 -0
- endoreg_db/urls/media.py +103 -66
- endoreg_db/utils/setup_config.py +177 -0
- endoreg_db/views/__init__.py +5 -3
- endoreg_db/views/media/pdf_media.py +3 -1
- endoreg_db/views/media/video_media.py +1 -1
- endoreg_db/views/media/video_segments.py +187 -259
- endoreg_db/views/pdf/__init__.py +5 -8
- endoreg_db/views/pdf/pdf_stream.py +186 -0
- endoreg_db/views/pdf/reimport.py +110 -94
- endoreg_db/views/requirement/lookup.py +171 -287
- endoreg_db/views/video/__init__.py +0 -2
- endoreg_db/views/video/video_examination_viewset.py +202 -289
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/METADATA +1 -2
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/RECORD +38 -37
- endoreg_db/views/pdf/pdf_media.py +0 -239
- endoreg_db/views/pdf/pdf_stream_views.py +0 -127
- endoreg_db/views/video/video_media.py +0 -158
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/WHEEL +0 -0
- {endoreg_db-0.8.3.3.dist-info → endoreg_db-0.8.6.5.dist-info}/licenses/LICENSE +0 -0
endoreg_db/views/pdf/__init__.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
from .
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
ClosingFileWrapper
|
|
5
|
-
)
|
|
1
|
+
from .reimport import PdfReimportView
|
|
2
|
+
from .pdf_stream import PdfStreamView
|
|
3
|
+
|
|
6
4
|
|
|
7
5
|
__all__ = [
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"ClosingFileWrapper",
|
|
6
|
+
"PdfReimportView",
|
|
7
|
+
"PdfStreamView",
|
|
11
8
|
]
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from django.http import FileResponse, Http404, StreamingHttpResponse
|
|
6
|
+
from django.views.decorators.clickjacking import xframe_options_exempt, xframe_options_sameorigin
|
|
7
|
+
from rest_framework.views import APIView
|
|
8
|
+
|
|
9
|
+
from endoreg_db.models import RawPdfFile
|
|
10
|
+
|
|
11
|
+
from ...utils.permissions import EnvironmentAwarePermission
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
_RANGE_RE = re.compile(r"bytes=(\d+)-(\d*)")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ClosingFileWrapper:
|
|
18
|
+
"""Custom file wrapper that ensures file is closed after streaming"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, file_handle, blksize=8192):
|
|
21
|
+
self.file_handle = file_handle
|
|
22
|
+
self.blksize = blksize
|
|
23
|
+
|
|
24
|
+
def __iter__(self):
|
|
25
|
+
return self
|
|
26
|
+
|
|
27
|
+
def __next__(self):
|
|
28
|
+
data = self.file_handle.read(self.blksize)
|
|
29
|
+
if not data:
|
|
30
|
+
self.file_handle.close()
|
|
31
|
+
raise StopIteration
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
def close(self):
|
|
35
|
+
if hasattr(self.file_handle, "close"):
|
|
36
|
+
self.file_handle.close()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class PdfStreamView(APIView):
|
|
40
|
+
"""
|
|
41
|
+
Streams a PDF file with correct HTTP range support and proper file handle management.
|
|
42
|
+
|
|
43
|
+
Supports streaming both raw (original) and anonymized PDF files.
|
|
44
|
+
|
|
45
|
+
Query Parameters:
|
|
46
|
+
type: 'raw' (default) or 'anonymized' - Selects which PDF file to stream
|
|
47
|
+
|
|
48
|
+
Examples:
|
|
49
|
+
GET /api/media/pdf/1/?type=raw - Stream original raw PDF
|
|
50
|
+
GET /api/media/pdf/1/?type=anonymized - Stream anonymized PDF
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
permission_classes = [EnvironmentAwarePermission]
|
|
54
|
+
@xframe_options_exempt
|
|
55
|
+
def get(self, request, pk: int, *args, **kwargs):
|
|
56
|
+
file_type = "raw" # Initialize for error logging
|
|
57
|
+
try:
|
|
58
|
+
pdf_obj = RawPdfFile.objects.filter(pk=pk).first()
|
|
59
|
+
if not pdf_obj:
|
|
60
|
+
logger.warning(f"PDF not found: ID {pk}")
|
|
61
|
+
raise Http404("PDF not found")
|
|
62
|
+
|
|
63
|
+
# Parse query parameters to determine which file to stream
|
|
64
|
+
file_type = request.query_params.get("type", "raw").lower()
|
|
65
|
+
if file_type not in ["raw", "anonymized"]:
|
|
66
|
+
logger.warning(f"Invalid file_type '{file_type}', defaulting to 'raw'")
|
|
67
|
+
file_type = "raw"
|
|
68
|
+
|
|
69
|
+
# Determine which file field to use
|
|
70
|
+
if file_type == "raw":
|
|
71
|
+
file_field = pdf_obj.file
|
|
72
|
+
if not file_field:
|
|
73
|
+
logger.warning(f"No raw PDF file available for PDF ID {pk}")
|
|
74
|
+
raise Http404("Raw PDF file not available")
|
|
75
|
+
else: # anonymized
|
|
76
|
+
file_field = pdf_obj.anonymized_file
|
|
77
|
+
if not file_field:
|
|
78
|
+
logger.warning(
|
|
79
|
+
f"No anonymized PDF file available for PDF ID {pk}"
|
|
80
|
+
)
|
|
81
|
+
raise Http404("Anonymized PDF file not available")
|
|
82
|
+
|
|
83
|
+
# Check if file exists on filesystem
|
|
84
|
+
try:
|
|
85
|
+
file_path = file_field.path
|
|
86
|
+
if not os.path.exists(file_path):
|
|
87
|
+
logger.error(f"PDF file does not exist on filesystem: {file_path}")
|
|
88
|
+
raise Http404(
|
|
89
|
+
f"{file_type.capitalize()} PDF file not found on filesystem"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
file_size = os.path.getsize(file_path)
|
|
93
|
+
except (OSError, IOError, AttributeError) as e:
|
|
94
|
+
logger.error(f"Error accessing {file_type} PDF file {pk}: {e}")
|
|
95
|
+
raise Http404(f"{file_type.capitalize()} PDF file not accessible")
|
|
96
|
+
|
|
97
|
+
# Generate safe filename
|
|
98
|
+
base_filename = (
|
|
99
|
+
os.path.basename(file_field.name)
|
|
100
|
+
if file_field.name
|
|
101
|
+
else f"document_{pk}.pdf"
|
|
102
|
+
)
|
|
103
|
+
if not base_filename.endswith(".pdf"):
|
|
104
|
+
base_filename += ".pdf"
|
|
105
|
+
|
|
106
|
+
# Add type indicator to filename for clarity
|
|
107
|
+
if file_type == "anonymized":
|
|
108
|
+
name_parts = base_filename.rsplit(".", 1)
|
|
109
|
+
safe_filename = f"{name_parts[0]}_anonymized.{name_parts[1]}"
|
|
110
|
+
else:
|
|
111
|
+
safe_filename = base_filename
|
|
112
|
+
|
|
113
|
+
# Handle Range requests
|
|
114
|
+
range_header = request.headers.get("Range")
|
|
115
|
+
if range_header:
|
|
116
|
+
logger.debug(
|
|
117
|
+
f"Range request for {file_type} PDF {pk}: {range_header}"
|
|
118
|
+
)
|
|
119
|
+
match = _RANGE_RE.match(range_header)
|
|
120
|
+
if match:
|
|
121
|
+
start = int(match.group(1))
|
|
122
|
+
end = int(match.group(2) or file_size - 1)
|
|
123
|
+
|
|
124
|
+
# Validate range
|
|
125
|
+
if start >= file_size or start < 0:
|
|
126
|
+
logger.warning(
|
|
127
|
+
f"Invalid range start {start} for file size {file_size}"
|
|
128
|
+
)
|
|
129
|
+
raise Http404("Invalid range")
|
|
130
|
+
|
|
131
|
+
if end >= file_size:
|
|
132
|
+
end = file_size - 1
|
|
133
|
+
|
|
134
|
+
chunk_size = end - start + 1
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
file_handle = open(file_path, "rb")
|
|
138
|
+
file_handle.seek(start)
|
|
139
|
+
|
|
140
|
+
logger.debug(
|
|
141
|
+
f"Serving {file_type} PDF {pk} range {start}-{end}/{file_size}"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
response = StreamingHttpResponse(
|
|
145
|
+
ClosingFileWrapper(file_handle, blksize=8192),
|
|
146
|
+
status=206,
|
|
147
|
+
content_type="application/pdf",
|
|
148
|
+
)
|
|
149
|
+
response["Content-Length"] = str(chunk_size)
|
|
150
|
+
response["Content-Range"] = f"bytes {start}-{end}/{file_size}"
|
|
151
|
+
response["Accept-Ranges"] = "bytes"
|
|
152
|
+
response["Content-Disposition"] = (
|
|
153
|
+
f'inline; filename="{safe_filename}"'
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return response
|
|
157
|
+
except (OSError, IOError) as e:
|
|
158
|
+
logger.error(
|
|
159
|
+
f"Error opening {file_type} PDF file for range request: {e}"
|
|
160
|
+
)
|
|
161
|
+
raise Http404(f"Error accessing {file_type} PDF file")
|
|
162
|
+
else:
|
|
163
|
+
logger.warning(f"Invalid Range header format: {range_header}")
|
|
164
|
+
|
|
165
|
+
# Serve entire file using FileResponse (automatically handles file closing)
|
|
166
|
+
logger.debug(f"Serving full {file_type} PDF {pk} ({file_size} bytes)")
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
file_handle = open(file_path, "rb")
|
|
170
|
+
response = FileResponse(file_handle, content_type="application/pdf")
|
|
171
|
+
response["Content-Length"] = str(file_size)
|
|
172
|
+
response["Accept-Ranges"] = "bytes"
|
|
173
|
+
response["Content-Disposition"] = f'inline; filename="{safe_filename}"'
|
|
174
|
+
|
|
175
|
+
# FileResponse will take ownership of file_handle and close it after response
|
|
176
|
+
return response
|
|
177
|
+
except (OSError, IOError) as e:
|
|
178
|
+
logger.error(f"Error opening {file_type} PDF file: {e}")
|
|
179
|
+
raise Http404(f"Error accessing {file_type} PDF file")
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.error(
|
|
183
|
+
f"Unexpected error streaming {file_type if 'file_type' in locals() else 'PDF'} {pk}: {e}",
|
|
184
|
+
exc_info=True,
|
|
185
|
+
)
|
|
186
|
+
raise Http404("Error streaming PDF")
|
endoreg_db/views/pdf/reimport.py
CHANGED
|
@@ -1,19 +1,22 @@
|
|
|
1
|
-
from rest_framework.views import APIView
|
|
2
|
-
from rest_framework.response import Response
|
|
3
|
-
from rest_framework import status
|
|
4
1
|
import logging
|
|
5
|
-
|
|
2
|
+
|
|
6
3
|
from django.db import transaction
|
|
4
|
+
from rest_framework import status
|
|
5
|
+
from rest_framework.response import Response
|
|
6
|
+
from rest_framework.views import APIView
|
|
7
|
+
|
|
7
8
|
from ...models import RawPdfFile, SensitiveMeta
|
|
8
9
|
from ...services.pdf_import import PdfImportService
|
|
10
|
+
|
|
9
11
|
logger = logging.getLogger(__name__)
|
|
10
12
|
|
|
13
|
+
|
|
11
14
|
class PdfReimportView(APIView):
|
|
12
15
|
"""
|
|
13
16
|
API endpoint to re-import a pdf file and regenerate metadata.
|
|
14
17
|
This is useful when OCR failed or metadata is incomplete.
|
|
15
18
|
"""
|
|
16
|
-
|
|
19
|
+
|
|
17
20
|
def __init__(self, **kwargs):
|
|
18
21
|
super().__init__(**kwargs)
|
|
19
22
|
self.pdf_service = PdfImportService()
|
|
@@ -22,140 +25,153 @@ class PdfReimportView(APIView):
|
|
|
22
25
|
"""
|
|
23
26
|
Re-import a pdf file to regenerate SensitiveMeta and other metadata.
|
|
24
27
|
Instead of creating a new pdf, this updates the existing one.
|
|
25
|
-
|
|
28
|
+
|
|
26
29
|
Args:
|
|
27
30
|
request: HTTP request object
|
|
28
31
|
pk: PDF primary key (ID)
|
|
29
32
|
"""
|
|
30
33
|
pdf_id = pk # Align with media framework naming convention
|
|
31
|
-
|
|
34
|
+
|
|
32
35
|
# Validate pdf_id parameter
|
|
33
36
|
if not pdf_id or not isinstance(pdf_id, int):
|
|
34
37
|
return Response(
|
|
35
|
-
{"error": "Invalid PDF ID provided."},
|
|
36
|
-
status=status.HTTP_400_BAD_REQUEST
|
|
38
|
+
{"error": "Invalid PDF ID provided."},
|
|
39
|
+
status=status.HTTP_400_BAD_REQUEST,
|
|
37
40
|
)
|
|
38
41
|
|
|
39
42
|
try:
|
|
40
43
|
pdf = RawPdfFile.objects.get(id=pdf_id)
|
|
41
|
-
logger.info(f"Found PDF {pdf.
|
|
44
|
+
logger.info(f"Found PDF {pdf.pdf_hash} (ID: {pdf_id}) for re-import")
|
|
42
45
|
except RawPdfFile.DoesNotExist:
|
|
43
46
|
logger.warning(f"PDF with ID {pdf_id} not found")
|
|
44
47
|
return Response(
|
|
45
|
-
{"error": f"PDF with ID {pdf_id} not found."},
|
|
46
|
-
status=status.HTTP_404_NOT_FOUND
|
|
48
|
+
{"error": f"PDF with ID {pdf_id} not found."},
|
|
49
|
+
status=status.HTTP_404_NOT_FOUND,
|
|
47
50
|
)
|
|
48
51
|
|
|
52
|
+
# Get raw file path using the model method
|
|
53
|
+
raw_file_path = pdf.get_raw_file_path()
|
|
49
54
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
logger.error(f"Raw file not found on disk: {raw_file_path}")
|
|
55
|
+
if not raw_file_path or not raw_file_path.exists():
|
|
56
|
+
logger.error(
|
|
57
|
+
f"Raw PDF file not found for hash {pdf.pdf_hash}: {raw_file_path}"
|
|
58
|
+
)
|
|
55
59
|
return Response(
|
|
56
|
-
{
|
|
57
|
-
|
|
60
|
+
{
|
|
61
|
+
"error": f"Raw PDF file not found for PDF {pdf.pdf_hash}. Please upload the original file again."
|
|
62
|
+
},
|
|
63
|
+
status=status.HTTP_404_NOT_FOUND,
|
|
58
64
|
)
|
|
59
65
|
|
|
60
66
|
# Check if PDF has required relationships
|
|
61
67
|
if not pdf.center:
|
|
62
|
-
logger.warning(f"PDF {pdf.
|
|
68
|
+
logger.warning(f"PDF {pdf.pdf_hash} has no associated center")
|
|
63
69
|
return Response(
|
|
64
|
-
{"error": "
|
|
65
|
-
status=status.HTTP_400_BAD_REQUEST
|
|
70
|
+
{"error": "PDF has no associated center."},
|
|
71
|
+
status=status.HTTP_400_BAD_REQUEST,
|
|
66
72
|
)
|
|
67
73
|
|
|
68
74
|
try:
|
|
69
|
-
logger.info(f"Starting
|
|
70
|
-
|
|
75
|
+
logger.info(f"Starting re-import for PDF {pdf.pdf_hash} (ID: {pdf_id})")
|
|
76
|
+
|
|
71
77
|
with transaction.atomic():
|
|
72
78
|
# Clear existing metadata to force regeneration
|
|
73
79
|
old_meta_id = None
|
|
74
80
|
if pdf.sensitive_meta:
|
|
75
|
-
old_meta_id = pdf.sensitive_meta.
|
|
76
|
-
logger.info(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
old_meta_id = pdf.sensitive_meta.pk
|
|
82
|
+
logger.info(
|
|
83
|
+
f"Clearing existing SensitiveMeta {old_meta_id} for PDF {pdf.pdf_hash}"
|
|
84
|
+
)
|
|
85
|
+
pdf.sensitive_meta = None # type: ignore
|
|
86
|
+
pdf.save(update_fields=["sensitive_meta"])
|
|
87
|
+
|
|
80
88
|
# Delete the old SensitiveMeta record
|
|
81
89
|
try:
|
|
82
|
-
SensitiveMeta.objects.filter(
|
|
90
|
+
SensitiveMeta.objects.filter(pk=old_meta_id).delete()
|
|
83
91
|
logger.info(f"Deleted old SensitiveMeta {old_meta_id}")
|
|
84
92
|
except Exception as e:
|
|
85
|
-
logger.warning(
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# Ensure minimum patient data is available
|
|
92
|
-
logger.info(f"Ensuring minimum patient data for {pdf.uuid}")
|
|
93
|
-
self.pdf_service._ensure_default_patient_data(pdf)
|
|
94
|
-
|
|
95
|
-
# Refresh from database to get updated data
|
|
96
|
-
pdf.refresh_from_db()
|
|
97
|
-
|
|
98
|
-
# Use VideoImportService for anonymization
|
|
93
|
+
logger.warning(
|
|
94
|
+
f"Could not delete old SensitiveMeta {old_meta_id}: {e}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Use PdfImportService for reprocessing
|
|
99
98
|
try:
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
logger.info(
|
|
100
|
+
f"Starting reprocessing using PdfImportService for {pdf.pdf_hash}"
|
|
101
|
+
)
|
|
102
102
|
self.pdf_service.import_and_anonymize(
|
|
103
103
|
file_path=raw_file_path,
|
|
104
104
|
center_name=pdf.center.name,
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
delete_source=False
|
|
105
|
+
delete_source=False, # Don't delete during reimport
|
|
106
|
+
retry=True, # Mark as retry attempt
|
|
108
107
|
)
|
|
109
|
-
|
|
110
|
-
logger.info(
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
108
|
+
|
|
109
|
+
logger.info(
|
|
110
|
+
f"PdfImportService reprocessing completed for {pdf.pdf_hash}"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Refresh to get updated state
|
|
114
|
+
pdf.refresh_from_db()
|
|
115
|
+
|
|
116
|
+
return Response(
|
|
117
|
+
{
|
|
118
|
+
"message": "PDF re-import completed successfully.",
|
|
119
|
+
"pdf_id": pdf_id,
|
|
120
|
+
"pdf_hash": str(pdf.pdf_hash),
|
|
121
|
+
"sensitive_meta_created": pdf.sensitive_meta is not None,
|
|
122
|
+
"sensitive_meta_id": pdf.sensitive_meta.pk
|
|
123
|
+
if pdf.sensitive_meta
|
|
124
|
+
else None,
|
|
125
|
+
"text_extracted": bool(pdf.text),
|
|
126
|
+
"anonymized": pdf.anonymized,
|
|
127
|
+
"status": "done",
|
|
128
|
+
},
|
|
129
|
+
status=status.HTTP_200_OK,
|
|
130
|
+
)
|
|
131
|
+
|
|
124
132
|
except Exception as e:
|
|
125
|
-
logger.exception(
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
"updated_in_place": True,
|
|
138
|
-
"status": "done"
|
|
139
|
-
}, status=status.HTTP_200_OK)
|
|
133
|
+
logger.exception(
|
|
134
|
+
f"PdfImportService reprocessing failed for PDF {pdf.pdf_hash}: {e}"
|
|
135
|
+
)
|
|
136
|
+
return Response(
|
|
137
|
+
{
|
|
138
|
+
"error": f"Reprocessing failed: {str(e)}",
|
|
139
|
+
"error_type": "processing_error",
|
|
140
|
+
"pdf_id": pdf_id,
|
|
141
|
+
"pdf_hash": str(pdf.pdf_hash),
|
|
142
|
+
},
|
|
143
|
+
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
144
|
+
)
|
|
140
145
|
|
|
141
146
|
except Exception as e:
|
|
142
|
-
logger.error(
|
|
143
|
-
|
|
147
|
+
logger.error(
|
|
148
|
+
f"Failed to re-import PDF {pdf.pdf_hash}: {str(e)}", exc_info=True
|
|
149
|
+
)
|
|
150
|
+
|
|
144
151
|
# Handle specific error types
|
|
145
152
|
error_msg = str(e)
|
|
146
|
-
if any(
|
|
153
|
+
if any(
|
|
154
|
+
phrase in error_msg.lower()
|
|
155
|
+
for phrase in ["insufficient storage", "no space left", "disk full"]
|
|
156
|
+
):
|
|
147
157
|
# Storage error - return specific error message
|
|
148
|
-
return Response(
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
158
|
+
return Response(
|
|
159
|
+
{
|
|
160
|
+
"error": f"Storage error during re-import: {error_msg}",
|
|
161
|
+
"error_type": "storage_error",
|
|
162
|
+
"pdf_id": pdf_id,
|
|
163
|
+
"pdf_hash": str(pdf.pdf_hash),
|
|
164
|
+
},
|
|
165
|
+
status=status.HTTP_507_INSUFFICIENT_STORAGE,
|
|
166
|
+
)
|
|
154
167
|
else:
|
|
155
168
|
# Other errors
|
|
156
|
-
return Response(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
169
|
+
return Response(
|
|
170
|
+
{
|
|
171
|
+
"error": f"Re-import failed: {error_msg}",
|
|
172
|
+
"error_type": "processing_error",
|
|
173
|
+
"pdf_id": pdf_id,
|
|
174
|
+
"pdf_hash": str(pdf.pdf_hash),
|
|
175
|
+
},
|
|
176
|
+
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
177
|
+
)
|