endoreg-db 0.6.3__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (798) hide show
  1. endoreg_db/admin.py +26 -26
  2. endoreg_db/api_urls.py +4 -0
  3. endoreg_db/apps.py +12 -0
  4. endoreg_db/assets/dummy_model.ckpt +1 -0
  5. endoreg_db/codemods/readme.md +88 -0
  6. endoreg_db/codemods/rename_datetime_fields.py +92 -0
  7. endoreg_db/config/env.py +101 -0
  8. endoreg_db/data/__init__.py +26 -0
  9. endoreg_db/data/ai_model/data.yaml +1 -1
  10. endoreg_db/data/ai_model_label/label/polyp_classification.yaml +52 -0
  11. endoreg_db/data/ai_model_label/label-set/data.yaml +20 -1
  12. endoreg_db/data/ai_model_label/label-set/polyp_classifications.yaml +25 -0
  13. endoreg_db/data/center/data.yaml +13 -12
  14. endoreg_db/data/center_shift/ukw.yaml +9 -0
  15. endoreg_db/data/db_summary.csv +58 -0
  16. endoreg_db/data/db_summary.xlsx +0 -0
  17. endoreg_db/data/disease/misc.yaml +1 -2
  18. endoreg_db/data/disease_classification/chronic_kidney_disease.yaml +2 -2
  19. endoreg_db/data/disease_classification_choice/chronic_kidney_disease.yaml +6 -6
  20. endoreg_db/data/endoscopy_processor/data.yaml +3 -0
  21. endoreg_db/data/event/cardiology.yaml +0 -13
  22. endoreg_db/data/examination/examinations/data.yaml +34 -28
  23. endoreg_db/data/examination/type/data.yaml +12 -0
  24. endoreg_db/data/examination_indication/endoscopy.yaml +418 -2
  25. endoreg_db/data/examination_indication_classification/endoscopy.yaml +157 -5
  26. endoreg_db/data/examination_requirement_set/colonoscopy.yaml +15 -0
  27. endoreg_db/data/finding/anatomy_colon.yaml +128 -0
  28. endoreg_db/data/finding/colonoscopy.yaml +40 -0
  29. endoreg_db/data/finding/colonoscopy_bowel_prep.yaml +56 -0
  30. endoreg_db/data/finding/complication.yaml +16 -0
  31. endoreg_db/data/finding/data.yaml +8 -44
  32. endoreg_db/data/finding/examination_setting.yaml +16 -0
  33. endoreg_db/data/finding/medication_related.yaml +18 -0
  34. endoreg_db/data/finding/outcome.yaml +12 -0
  35. endoreg_db/data/finding_classification/colonoscopy_bowel_preparation.yaml +95 -0
  36. endoreg_db/data/finding_classification/colonoscopy_jnet.yaml +22 -0
  37. endoreg_db/data/finding_classification/colonoscopy_kudo.yaml +25 -0
  38. endoreg_db/data/finding_classification/colonoscopy_lesion_circularity.yaml +20 -0
  39. endoreg_db/data/finding_classification/colonoscopy_lesion_planarity.yaml +24 -0
  40. endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +68 -0
  41. endoreg_db/data/finding_classification/colonoscopy_lesion_surface.yaml +20 -0
  42. endoreg_db/data/finding_classification/colonoscopy_location.yaml +80 -0
  43. endoreg_db/data/finding_classification/colonoscopy_lst.yaml +21 -0
  44. endoreg_db/data/finding_classification/colonoscopy_nice.yaml +20 -0
  45. endoreg_db/data/finding_classification/colonoscopy_paris.yaml +26 -0
  46. endoreg_db/data/finding_classification/colonoscopy_sano.yaml +22 -0
  47. endoreg_db/data/finding_classification/colonoscopy_summary.yaml +53 -0
  48. endoreg_db/data/finding_classification/complication_generic.yaml +25 -0
  49. endoreg_db/data/finding_classification/examination_setting_generic.yaml +40 -0
  50. endoreg_db/data/finding_classification/histology_colo.yaml +51 -0
  51. endoreg_db/data/finding_classification/intervention_required.yaml +26 -0
  52. endoreg_db/data/finding_classification/medication_related.yaml +23 -0
  53. endoreg_db/data/finding_classification/visualized.yaml +33 -0
  54. endoreg_db/data/finding_classification_choice/bowel_preparation.yaml +78 -0
  55. endoreg_db/data/{finding_morphology_classification_choice → finding_classification_choice}/colon_lesion_circularity_default.yaml +0 -2
  56. endoreg_db/data/finding_classification_choice/colon_lesion_jnet.yaml +15 -0
  57. endoreg_db/data/finding_classification_choice/colon_lesion_kudo.yaml +23 -0
  58. endoreg_db/data/finding_classification_choice/colon_lesion_lst.yaml +15 -0
  59. endoreg_db/data/{finding_morphology_classification_choice → finding_classification_choice}/colon_lesion_nice.yaml +4 -7
  60. endoreg_db/data/{finding_morphology_classification_choice → finding_classification_choice}/colon_lesion_paris.yaml +0 -8
  61. endoreg_db/data/{finding_morphology_classification_choice → finding_classification_choice}/colon_lesion_planarity_default.yaml +6 -13
  62. endoreg_db/data/finding_classification_choice/colon_lesion_sano.yaml +14 -0
  63. endoreg_db/data/{finding_morphology_classification_choice → finding_classification_choice}/colon_lesion_surface_intact_default.yaml +3 -6
  64. endoreg_db/data/{finding_location_classification_choice/colonoscopy.yaml → finding_classification_choice/colonoscopy_location.yaml} +11 -22
  65. endoreg_db/data/finding_classification_choice/colonoscopy_not_complete_reason.yaml +19 -0
  66. endoreg_db/data/finding_classification_choice/colonoscopy_size.yaml +82 -0
  67. endoreg_db/data/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +15 -0
  68. endoreg_db/data/finding_classification_choice/complication_generic_types.yaml +15 -0
  69. endoreg_db/data/finding_classification_choice/examination_setting_generic_types.yaml +15 -0
  70. endoreg_db/data/finding_classification_choice/histology.yaml +24 -0
  71. endoreg_db/data/finding_classification_choice/histology_polyp.yaml +20 -0
  72. endoreg_db/data/finding_classification_choice/outcome.yaml +19 -0
  73. endoreg_db/data/finding_classification_choice/yes_no_na.yaml +11 -0
  74. endoreg_db/data/finding_classification_type/colonoscopy_basic.yaml +48 -0
  75. endoreg_db/data/finding_intervention/endoscopy.yaml +26 -121
  76. endoreg_db/data/finding_intervention/endoscopy_colonoscopy.yaml +168 -0
  77. endoreg_db/data/finding_intervention/endoscopy_egd.yaml +128 -0
  78. endoreg_db/data/finding_intervention/endoscopy_ercp.yaml +32 -0
  79. endoreg_db/data/finding_intervention/endoscopy_eus_lower.yaml +9 -0
  80. endoreg_db/data/finding_intervention/endoscopy_eus_upper.yaml +36 -0
  81. endoreg_db/data/finding_morphology_classification_type/colonoscopy.yaml +6 -6
  82. endoreg_db/data/finding_type/data.yaml +23 -10
  83. endoreg_db/data/gender/data.yaml +8 -1
  84. endoreg_db/data/information_source/annotation.yaml +6 -0
  85. endoreg_db/data/information_source/endoscopy_guidelines.yaml +7 -0
  86. endoreg_db/data/information_source/prediction.yaml +7 -0
  87. endoreg_db/data/information_source_type/data.yaml +8 -0
  88. endoreg_db/data/lab_value/misc.yaml +43 -0
  89. endoreg_db/data/medication/anticoagulation.yaml +5 -5
  90. endoreg_db/data/medication/tah.yaml +5 -5
  91. endoreg_db/data/medication_indication/anticoagulation.yaml +4 -4
  92. endoreg_db/data/medication_intake_time/base.yaml +4 -4
  93. endoreg_db/data/names_first/first_names.yaml +3 -0
  94. endoreg_db/data/pdf_type/data.yaml +27 -10
  95. endoreg_db/data/qualification/endoscopy.yaml +36 -0
  96. endoreg_db/data/qualification/m2.yaml +39 -0
  97. endoreg_db/data/qualification/outpatient_clinic.yaml +35 -0
  98. endoreg_db/data/qualification/sonography.yaml +36 -0
  99. endoreg_db/data/qualification_type/base.yaml +29 -0
  100. endoreg_db/data/report_reader_flag/rkh-histology-generic.yaml +10 -0
  101. endoreg_db/data/report_reader_flag/ukw-histology-generic.yaml +5 -0
  102. endoreg_db/data/requirement/age.yaml +26 -0
  103. endoreg_db/data/requirement/colonoscopy_baseline_austria.yaml +45 -0
  104. endoreg_db/data/requirement/disease_cardiovascular.yaml +79 -0
  105. endoreg_db/data/requirement/disease_classification_choice_cardiovascular.yaml +41 -0
  106. endoreg_db/data/requirement/disease_hepatology.yaml +12 -0
  107. endoreg_db/data/requirement/disease_misc.yaml +12 -0
  108. endoreg_db/data/requirement/disease_renal.yaml +96 -0
  109. endoreg_db/data/requirement/endoscopy_bleeding_risk.yaml +59 -0
  110. endoreg_db/data/requirement/event_cardiology.yaml +251 -0
  111. endoreg_db/data/requirement/event_requirements.yaml +145 -0
  112. endoreg_db/data/requirement/finding_colon_polyp.yaml +50 -0
  113. endoreg_db/data/requirement/gender.yaml +25 -0
  114. endoreg_db/data/requirement/lab_value.yaml +441 -0
  115. endoreg_db/data/requirement/medication.yaml +93 -0
  116. endoreg_db/data/requirement_operator/age.yaml +13 -0
  117. endoreg_db/data/requirement_operator/lab_operators.yaml +129 -0
  118. endoreg_db/data/requirement_operator/model_operators.yaml +96 -0
  119. endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +48 -0
  120. endoreg_db/data/requirement_set/colonoscopy_austria_screening.yaml +57 -0
  121. endoreg_db/data/requirement_set/endoscopy_bleeding_risk.yaml +52 -0
  122. endoreg_db/data/requirement_set_type/data.yaml +20 -0
  123. endoreg_db/data/requirement_type/requirement_types.yaml +165 -0
  124. endoreg_db/data/risk/bleeding.yaml +26 -0
  125. endoreg_db/data/risk/thrombosis.yaml +37 -0
  126. endoreg_db/data/risk_type/data.yaml +27 -0
  127. endoreg_db/data/shift/endoscopy.yaml +21 -0
  128. endoreg_db/data/shift_type/base.yaml +35 -0
  129. endoreg_db/data/tag/requirement_set_tags.yaml +11 -0
  130. endoreg_db/data/unit/concentration.yaml +23 -0
  131. endoreg_db/data/unit/time.yaml +36 -1
  132. endoreg_db/exceptions.py +19 -0
  133. endoreg_db/forms/patient_finding_intervention_form.py +4 -5
  134. endoreg_db/forms/patient_form.py +7 -6
  135. endoreg_db/forms/questionnaires/__init__.py +1 -1
  136. endoreg_db/forms/questionnaires/tto_questionnaire.py +19 -19
  137. endoreg_db/helpers/count_db.py +45 -0
  138. endoreg_db/helpers/data_loader.py +208 -0
  139. endoreg_db/helpers/default_objects.py +359 -0
  140. endoreg_db/helpers/interact.py +6 -0
  141. endoreg_db/helpers/test_video_helper.py +119 -0
  142. endoreg_db/logger_conf.py +140 -0
  143. endoreg_db/management/__init__.py +1 -0
  144. endoreg_db/management/commands/__init__.py +1 -0
  145. endoreg_db/management/commands/anonymize_video.py +0 -0
  146. endoreg_db/management/commands/check_auth.py +125 -0
  147. endoreg_db/management/commands/create_multilabel_model_meta.py +214 -0
  148. endoreg_db/management/commands/fix_missing_patient_data.py +172 -0
  149. endoreg_db/management/commands/fix_video_paths.py +165 -0
  150. endoreg_db/management/commands/import_fallback_video.py +203 -0
  151. endoreg_db/management/commands/import_report.py +298 -0
  152. endoreg_db/management/commands/import_video.py +422 -0
  153. endoreg_db/management/commands/import_video_with_classification.py +367 -0
  154. endoreg_db/management/commands/init_default_ai_model.py +112 -0
  155. endoreg_db/management/commands/load_ai_model_data.py +2 -7
  156. endoreg_db/management/commands/load_base_db_data.py +15 -1
  157. endoreg_db/management/commands/load_center_data.py +46 -21
  158. endoreg_db/management/commands/load_endoscope_data.py +2 -2
  159. endoreg_db/management/commands/load_examination_indication_data.py +49 -28
  160. endoreg_db/management/commands/load_finding_data.py +49 -92
  161. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +0 -1
  162. endoreg_db/management/commands/load_information_source.py +13 -7
  163. endoreg_db/management/commands/load_qualification_data.py +59 -0
  164. endoreg_db/management/commands/load_requirement_data.py +180 -0
  165. endoreg_db/management/commands/load_risk_data.py +56 -0
  166. endoreg_db/management/commands/load_shift_data.py +60 -0
  167. endoreg_db/management/commands/load_tag_data.py +57 -0
  168. endoreg_db/management/commands/register_ai_model.py +1 -1
  169. endoreg_db/management/commands/start_filewatcher.py +106 -0
  170. endoreg_db/management/commands/storage_management.py +548 -0
  171. endoreg_db/management/commands/summarize_db_content.py +189 -0
  172. endoreg_db/management/commands/validate_video.py +204 -0
  173. endoreg_db/management/commands/validate_video_files.py +161 -0
  174. endoreg_db/management/commands/video_validation.py +22 -0
  175. endoreg_db/migrations/0001_initial.py +625 -813
  176. endoreg_db/migrations/0002_add_video_correction_models.py +52 -0
  177. endoreg_db/models/__init__.py +274 -291
  178. endoreg_db/models/administration/__init__.py +116 -0
  179. endoreg_db/models/{ai_model → administration/ai}/__init__.py +6 -1
  180. endoreg_db/models/administration/ai/active_model.py +35 -0
  181. endoreg_db/models/administration/ai/ai_model.py +156 -0
  182. endoreg_db/models/{ai_model → administration/ai}/model_type.py +6 -1
  183. endoreg_db/models/administration/case/__init__.py +19 -0
  184. endoreg_db/models/administration/case/case.py +114 -0
  185. endoreg_db/models/{case_template → administration/case/case_template}/case_template.py +3 -3
  186. endoreg_db/models/{case_template → administration/case/case_template}/case_template_rule.py +3 -10
  187. endoreg_db/models/{case_template → administration/case/case_template}/case_template_rule_value.py +2 -4
  188. endoreg_db/models/{case_template → administration/case/case_template}/case_template_type.py +1 -3
  189. endoreg_db/models/{center → administration/center}/__init__.py +3 -1
  190. endoreg_db/models/administration/center/center.py +61 -0
  191. endoreg_db/models/administration/center/center_product.py +64 -0
  192. endoreg_db/models/{center → administration/center}/center_resource.py +19 -3
  193. endoreg_db/models/administration/center/center_shift.py +88 -0
  194. endoreg_db/models/administration/center/center_waste.py +30 -0
  195. endoreg_db/models/administration/permissions/__init__.py +44 -0
  196. endoreg_db/models/administration/person/__init__.py +24 -0
  197. endoreg_db/models/administration/person/employee/__init__.py +3 -0
  198. endoreg_db/models/administration/person/employee/employee.py +35 -0
  199. endoreg_db/models/administration/person/employee/employee_qualification.py +39 -0
  200. endoreg_db/models/administration/person/employee/employee_type.py +42 -0
  201. endoreg_db/models/administration/person/examiner/__init__.py +4 -0
  202. endoreg_db/models/administration/person/examiner/examiner.py +54 -0
  203. endoreg_db/models/administration/person/names/__init__.py +0 -0
  204. endoreg_db/models/{persons → administration/person/names}/first_name.py +1 -1
  205. endoreg_db/models/{persons → administration/person/names}/last_name.py +2 -3
  206. endoreg_db/models/administration/person/patient/__init__.py +5 -0
  207. endoreg_db/models/administration/person/patient/patient.py +460 -0
  208. endoreg_db/models/administration/person/profession/__init__.py +24 -0
  209. endoreg_db/models/administration/person/user/__init__.py +5 -0
  210. endoreg_db/models/administration/person/user/portal_user_information.py +37 -0
  211. endoreg_db/models/administration/product/product.py +97 -0
  212. endoreg_db/models/administration/product/product_group.py +39 -0
  213. endoreg_db/models/administration/product/product_material.py +54 -0
  214. endoreg_db/models/{product → administration/product}/product_weight.py +9 -0
  215. endoreg_db/models/{product → administration/product}/reference_product.py +26 -11
  216. endoreg_db/models/administration/qualification/__init__.py +7 -0
  217. endoreg_db/models/administration/qualification/qualification.py +37 -0
  218. endoreg_db/models/administration/qualification/qualification_type.py +35 -0
  219. endoreg_db/models/administration/shift/__init__.py +9 -0
  220. endoreg_db/models/administration/shift/scheduled_days.py +69 -0
  221. endoreg_db/models/administration/shift/shift.py +51 -0
  222. endoreg_db/models/administration/shift/shift_type.py +108 -0
  223. endoreg_db/models/label/__init__.py +24 -1
  224. endoreg_db/models/label/annotation/__init__.py +12 -0
  225. endoreg_db/models/label/annotation/image_classification.py +84 -0
  226. endoreg_db/models/label/annotation/video_segmentation_annotation.py +66 -0
  227. endoreg_db/models/label/label.py +45 -74
  228. endoreg_db/models/label/label_set.py +53 -0
  229. endoreg_db/models/label/label_type.py +29 -0
  230. endoreg_db/models/label/label_video_segment/__init__.py +3 -0
  231. endoreg_db/models/label/label_video_segment/_create_from_video.py +41 -0
  232. endoreg_db/models/label/label_video_segment/label_video_segment.py +511 -0
  233. endoreg_db/models/label/video_segmentation_label.py +31 -0
  234. endoreg_db/models/{annotation → label}/video_segmentation_labelset.py +7 -0
  235. endoreg_db/models/media/__init__.py +14 -0
  236. endoreg_db/models/media/frame/__init__.py +3 -0
  237. endoreg_db/models/media/frame/frame.py +111 -0
  238. endoreg_db/models/media/pdf/__init__.py +11 -0
  239. endoreg_db/models/media/pdf/raw_pdf.py +608 -0
  240. endoreg_db/models/media/pdf/report_file.py +162 -0
  241. endoreg_db/models/media/pdf/report_reader/report_reader_config.py +77 -0
  242. endoreg_db/models/media/video/__init__.py +4 -0
  243. endoreg_db/models/media/video/create_from_file.py +336 -0
  244. endoreg_db/models/media/video/pipe_1.py +195 -0
  245. endoreg_db/models/media/video/pipe_2.py +105 -0
  246. endoreg_db/models/media/video/refactor_plan.md +0 -0
  247. endoreg_db/models/media/video/video_file.py +680 -0
  248. endoreg_db/models/media/video/video_file_ai.py +443 -0
  249. endoreg_db/models/media/video/video_file_anonymize.py +348 -0
  250. endoreg_db/models/media/video/video_file_frames/__init__.py +47 -0
  251. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +22 -0
  252. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +23 -0
  253. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +104 -0
  254. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +174 -0
  255. endoreg_db/models/media/video/video_file_frames/_get_frame.py +28 -0
  256. endoreg_db/models/media/video/video_file_frames/_get_frame_number.py +27 -0
  257. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +20 -0
  258. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +27 -0
  259. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +34 -0
  260. endoreg_db/models/media/video/video_file_frames/_get_frames.py +27 -0
  261. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +129 -0
  262. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +129 -0
  263. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +65 -0
  264. endoreg_db/models/media/video/video_file_frames.py +0 -0
  265. endoreg_db/models/media/video/video_file_io.py +166 -0
  266. endoreg_db/models/media/video/video_file_meta/__init__.py +22 -0
  267. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +45 -0
  268. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +39 -0
  269. endoreg_db/models/media/video/video_file_meta/get_fps.py +147 -0
  270. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +143 -0
  271. endoreg_db/models/media/video/video_file_meta/text_meta.py +134 -0
  272. endoreg_db/models/media/video/video_file_meta/video_meta.py +70 -0
  273. endoreg_db/models/media/video/video_file_meta.py +11 -0
  274. endoreg_db/models/media/video/video_file_segments.py +209 -0
  275. endoreg_db/models/medical/__init__.py +146 -0
  276. endoreg_db/models/{contraindication → medical/contraindication}/__init__.py +1 -5
  277. endoreg_db/models/medical/disease.py +156 -0
  278. endoreg_db/models/medical/event.py +137 -0
  279. endoreg_db/models/{examination → medical/examination}/__init__.py +1 -1
  280. endoreg_db/models/medical/examination/examination.py +148 -0
  281. endoreg_db/models/medical/examination/examination_indication.py +278 -0
  282. endoreg_db/models/{examination → medical/examination}/examination_time.py +0 -4
  283. endoreg_db/models/{examination → medical/examination}/examination_time_type.py +1 -8
  284. endoreg_db/models/{examination → medical/examination}/examination_type.py +18 -10
  285. endoreg_db/models/medical/finding/__init__.py +18 -0
  286. endoreg_db/models/medical/finding/finding.py +96 -0
  287. endoreg_db/models/medical/finding/finding_classification.py +142 -0
  288. endoreg_db/models/{finding → medical/finding}/finding_intervention.py +2 -10
  289. endoreg_db/models/medical/finding/finding_type.py +35 -0
  290. endoreg_db/models/medical/hardware/__init__.py +8 -0
  291. endoreg_db/models/{hardware → medical/hardware}/endoscope.py +28 -23
  292. endoreg_db/models/medical/laboratory/__init__.py +5 -0
  293. endoreg_db/models/medical/laboratory/lab_value.py +419 -0
  294. endoreg_db/models/{medication → medical/medication}/medication.py +1 -3
  295. endoreg_db/models/{medication → medical/medication}/medication_indication_type.py +8 -3
  296. endoreg_db/models/{medication → medical/medication}/medication_intake_time.py +21 -3
  297. endoreg_db/models/{medication → medical/medication}/medication_schedule.py +13 -5
  298. endoreg_db/models/{organ → medical/organ}/__init__.py +3 -6
  299. endoreg_db/models/medical/patient/__init__.py +56 -0
  300. endoreg_db/models/medical/patient/medication_examples.py +38 -0
  301. endoreg_db/models/medical/patient/patient_disease.py +63 -0
  302. endoreg_db/models/medical/patient/patient_event.py +75 -0
  303. endoreg_db/models/medical/patient/patient_examination.py +249 -0
  304. endoreg_db/models/{persons → medical}/patient/patient_examination_indication.py +21 -9
  305. endoreg_db/models/medical/patient/patient_finding.py +357 -0
  306. endoreg_db/models/medical/patient/patient_finding_classification.py +207 -0
  307. endoreg_db/models/{patient → medical/patient}/patient_finding_intervention.py +15 -1
  308. endoreg_db/models/medical/patient/patient_lab_sample.py +148 -0
  309. endoreg_db/models/{persons → medical}/patient/patient_lab_value.py +40 -15
  310. endoreg_db/models/medical/patient/patient_medication.py +104 -0
  311. endoreg_db/models/medical/patient/patient_medication_schedule.py +136 -0
  312. endoreg_db/models/medical/risk/__init__.py +7 -0
  313. endoreg_db/models/medical/risk/risk.py +72 -0
  314. endoreg_db/models/medical/risk/risk_type.py +51 -0
  315. endoreg_db/models/{data_file/metadata → metadata}/__init__.py +6 -0
  316. endoreg_db/models/metadata/frame_ocr_result.py +0 -0
  317. endoreg_db/models/metadata/model_meta.py +193 -0
  318. endoreg_db/models/metadata/model_meta_logic.py +236 -0
  319. endoreg_db/models/{data_file/metadata → metadata}/pdf_meta.py +28 -13
  320. endoreg_db/models/metadata/sensitive_meta.py +288 -0
  321. endoreg_db/models/metadata/sensitive_meta_logic.py +643 -0
  322. endoreg_db/models/metadata/video_meta.py +332 -0
  323. endoreg_db/models/metadata/video_prediction_logic.py +190 -0
  324. endoreg_db/models/metadata/video_prediction_meta.py +270 -0
  325. endoreg_db/models/other/__init__.py +17 -0
  326. endoreg_db/models/other/distribution/date_value_distribution.py +0 -2
  327. endoreg_db/models/other/distribution/numeric_value_distribution.py +30 -2
  328. endoreg_db/models/{emission → other/emission}/emission_factor.py +15 -6
  329. endoreg_db/models/{persons → other}/gender.py +8 -3
  330. endoreg_db/models/other/information_source.py +159 -0
  331. endoreg_db/models/other/material.py +10 -2
  332. endoreg_db/models/other/resource.py +6 -2
  333. endoreg_db/models/other/tag.py +27 -0
  334. endoreg_db/models/other/transport_route.py +13 -2
  335. endoreg_db/models/{unit.py → other/unit.py} +16 -6
  336. endoreg_db/models/other/waste.py +10 -3
  337. endoreg_db/models/requirement/__init__.py +11 -0
  338. endoreg_db/models/requirement/requirement.py +767 -0
  339. endoreg_db/models/requirement/requirement_evaluation/__init__.py +6 -0
  340. endoreg_db/models/requirement/requirement_evaluation/get_values.py +40 -0
  341. endoreg_db/models/requirement/requirement_evaluation/operator_evaluation_models.py +9 -0
  342. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +95 -0
  343. endoreg_db/models/requirement/requirement_operator.py +176 -0
  344. endoreg_db/models/requirement/requirement_set.py +287 -0
  345. endoreg_db/models/rule/__init__.py +13 -0
  346. endoreg_db/models/{rules → rule}/rule.py +6 -3
  347. endoreg_db/models/{rules → rule}/rule_attribute_dtype.py +0 -2
  348. endoreg_db/models/{rules → rule}/rule_type.py +0 -2
  349. endoreg_db/models/{rules → rule}/ruleset.py +0 -2
  350. endoreg_db/models/state/__init__.py +12 -0
  351. endoreg_db/models/state/abstract.py +11 -0
  352. endoreg_db/models/state/audit_ledger.py +150 -0
  353. endoreg_db/models/state/label_video_segment.py +22 -0
  354. endoreg_db/models/state/raw_pdf.py +187 -0
  355. endoreg_db/models/state/sensitive_meta.py +46 -0
  356. endoreg_db/models/state/video.py +232 -0
  357. endoreg_db/models/upload_job.py +99 -0
  358. endoreg_db/models/utils.py +135 -0
  359. endoreg_db/models/video_metadata.py +66 -0
  360. endoreg_db/models/video_processing.py +153 -0
  361. endoreg_db/renames.yml +8 -0
  362. endoreg_db/root_urls.py +9 -0
  363. endoreg_db/schemas/__init__.py +0 -0
  364. endoreg_db/schemas/examination_evaluation.py +27 -0
  365. endoreg_db/serializers/Frames_NICE_and_PARIS_classifications.py +775 -0
  366. endoreg_db/serializers/__init__.py +147 -10
  367. endoreg_db/serializers/{raw_pdf_meta_validation.py → _old/raw_pdf_meta_validation.py} +3 -3
  368. endoreg_db/serializers/{raw_video_meta_validation.py → _old/raw_video_meta_validation.py} +18 -14
  369. endoreg_db/serializers/_old/video.py +71 -0
  370. endoreg_db/serializers/administration/__init__.py +14 -0
  371. endoreg_db/serializers/administration/ai/__init__.py +10 -0
  372. endoreg_db/serializers/administration/ai/active_model.py +10 -0
  373. endoreg_db/serializers/administration/ai/ai_model.py +18 -0
  374. endoreg_db/serializers/administration/ai/model_type.py +10 -0
  375. endoreg_db/serializers/administration/center.py +9 -0
  376. endoreg_db/serializers/administration/gender.py +9 -0
  377. endoreg_db/serializers/anonymization.py +66 -0
  378. endoreg_db/serializers/evaluation/examination_evaluation.py +1 -0
  379. endoreg_db/serializers/examination/__init__.py +10 -0
  380. endoreg_db/serializers/examination/base.py +46 -0
  381. endoreg_db/serializers/examination/dropdown.py +21 -0
  382. endoreg_db/serializers/examination_serializer.py +12 -0
  383. endoreg_db/serializers/finding/__init__.py +5 -0
  384. endoreg_db/serializers/finding/finding.py +54 -0
  385. endoreg_db/serializers/finding_classification/__init__.py +7 -0
  386. endoreg_db/serializers/finding_classification/choice.py +19 -0
  387. endoreg_db/serializers/finding_classification/classification.py +13 -0
  388. endoreg_db/serializers/label/__init__.py +7 -0
  389. endoreg_db/serializers/label/image_classification_annotation.py +62 -0
  390. endoreg_db/serializers/label/label.py +15 -0
  391. endoreg_db/serializers/label_video_segment/__init__.py +7 -0
  392. endoreg_db/serializers/label_video_segment/_lvs_create.py +149 -0
  393. endoreg_db/serializers/label_video_segment/_lvs_update.py +138 -0
  394. endoreg_db/serializers/label_video_segment/_lvs_validate.py +149 -0
  395. endoreg_db/serializers/label_video_segment/label_video_segment.py +344 -0
  396. endoreg_db/serializers/label_video_segment/label_video_segment_annotation.py +99 -0
  397. endoreg_db/serializers/label_video_segment/label_video_segment_update.py +163 -0
  398. endoreg_db/serializers/meta/__init__.py +19 -0
  399. endoreg_db/serializers/meta/pdf_file_meta_extraction.py +115 -0
  400. endoreg_db/serializers/meta/report_meta.py +53 -0
  401. endoreg_db/serializers/meta/sensitive_meta_detail.py +162 -0
  402. endoreg_db/serializers/meta/sensitive_meta_update.py +148 -0
  403. endoreg_db/serializers/meta/sensitive_meta_verification.py +59 -0
  404. endoreg_db/serializers/meta/video_meta.py +39 -0
  405. endoreg_db/serializers/misc/__init__.py +14 -0
  406. endoreg_db/serializers/misc/file_overview.py +152 -0
  407. endoreg_db/serializers/misc/stats.py +33 -0
  408. endoreg_db/serializers/misc/translatable_field_mix_in.py +44 -0
  409. endoreg_db/serializers/misc/upload_job.py +71 -0
  410. endoreg_db/serializers/misc/vop_patient_data.py +120 -0
  411. endoreg_db/serializers/patient/__init__.py +11 -0
  412. endoreg_db/serializers/patient/patient.py +86 -0
  413. endoreg_db/serializers/patient/patient_dropdown.py +27 -0
  414. endoreg_db/serializers/patient_examination/__init__.py +7 -0
  415. endoreg_db/serializers/patient_examination/patient_examination.py +141 -0
  416. endoreg_db/serializers/patient_finding/__init__.py +15 -0
  417. endoreg_db/serializers/patient_finding/patient_finding.py +31 -0
  418. endoreg_db/serializers/patient_finding/patient_finding_classification.py +39 -0
  419. endoreg_db/serializers/patient_finding/patient_finding_detail.py +53 -0
  420. endoreg_db/serializers/patient_finding/patient_finding_intervention.py +26 -0
  421. endoreg_db/serializers/patient_finding/patient_finding_list.py +41 -0
  422. endoreg_db/serializers/patient_finding/patient_finding_write.py +126 -0
  423. endoreg_db/serializers/pdf/__init__.py +5 -0
  424. endoreg_db/serializers/pdf/anony_text_validation.py +85 -0
  425. endoreg_db/serializers/report/__init__.py +9 -0
  426. endoreg_db/serializers/report/mixins.py +45 -0
  427. endoreg_db/serializers/report/report.py +105 -0
  428. endoreg_db/serializers/report/report_list.py +22 -0
  429. endoreg_db/serializers/report/secure_file_url.py +26 -0
  430. endoreg_db/serializers/requirements/requirement_schema.py +25 -0
  431. endoreg_db/serializers/requirements/requirement_sets.py +29 -0
  432. endoreg_db/serializers/sensitive_meta_serializer.py +282 -0
  433. endoreg_db/serializers/video/__init__.py +7 -0
  434. endoreg_db/serializers/video/segmentation.py +263 -0
  435. endoreg_db/serializers/video/video_file_brief.py +10 -0
  436. endoreg_db/serializers/video/video_file_detail.py +83 -0
  437. endoreg_db/serializers/video/video_file_list.py +67 -0
  438. endoreg_db/serializers/video/video_metadata.py +105 -0
  439. endoreg_db/serializers/video/video_processing_history.py +153 -0
  440. endoreg_db/services/__init__.py +5 -0
  441. endoreg_db/services/anonymization.py +223 -0
  442. endoreg_db/services/examination_evaluation.py +149 -0
  443. endoreg_db/services/finding_description_service.py +0 -0
  444. endoreg_db/services/lookup_service.py +241 -0
  445. endoreg_db/services/lookup_store.py +122 -0
  446. endoreg_db/services/ollama_api_docs.py +1528 -0
  447. endoreg_db/services/pdf_import.py +993 -0
  448. endoreg_db/services/polling_coordinator.py +288 -0
  449. endoreg_db/services/pseudonym_service.py +89 -0
  450. endoreg_db/services/requirements_object.py +147 -0
  451. endoreg_db/services/segment_sync.py +155 -0
  452. endoreg_db/services/storage_aware_video_processor.py +344 -0
  453. endoreg_db/services/video_import.py +915 -0
  454. endoreg_db/tasks/upload_tasks.py +207 -0
  455. endoreg_db/tasks/video_ingest.py +157 -0
  456. endoreg_db/tasks/video_processing_tasks.py +327 -0
  457. endoreg_db/urls/__init__.py +72 -0
  458. endoreg_db/urls/anonymization.py +32 -0
  459. endoreg_db/urls/auth.py +16 -0
  460. endoreg_db/urls/classification.py +39 -0
  461. endoreg_db/urls/examination.py +54 -0
  462. endoreg_db/urls/files.py +6 -0
  463. endoreg_db/urls/label_video_segment_validate.py +33 -0
  464. endoreg_db/urls/label_video_segments.py +44 -0
  465. endoreg_db/urls/media.py +32 -0
  466. endoreg_db/urls/patient.py +19 -0
  467. endoreg_db/urls/pdf.py +0 -0
  468. endoreg_db/urls/report.py +78 -0
  469. endoreg_db/urls/requirements.py +13 -0
  470. endoreg_db/urls/sensitive_meta.py +36 -0
  471. endoreg_db/urls/stats.py +46 -0
  472. endoreg_db/urls/upload.py +20 -0
  473. endoreg_db/urls/video.py +119 -0
  474. endoreg_db/urls.py +6 -269
  475. endoreg_db/utils/__init__.py +68 -16
  476. endoreg_db/utils/ai/__init__.py +9 -0
  477. endoreg_db/{models/ai_model/utils.py → utils/ai/get.py} +1 -4
  478. endoreg_db/{models/ai_model/lightning → utils/ai}/inference_dataset.py +0 -1
  479. endoreg_db/{models/ai_model/lightning → utils/ai}/multilabel_classification_net.py +14 -10
  480. endoreg_db/{models/ai_model/lightning → utils/ai}/postprocess.py +15 -5
  481. endoreg_db/utils/ai/predict.py +291 -0
  482. endoreg_db/{models/ai_model/lightning → utils/ai}/preprocess.py +1 -1
  483. endoreg_db/utils/calc_duration_seconds.py +24 -0
  484. endoreg_db/utils/case_generator/__init__.py +0 -0
  485. endoreg_db/utils/check_video_files.py +148 -0
  486. endoreg_db/utils/dataloader.py +88 -31
  487. endoreg_db/utils/dates.py +21 -0
  488. endoreg_db/utils/env.py +33 -0
  489. endoreg_db/utils/extract_specific_frames.py +72 -0
  490. endoreg_db/utils/file_operations.py +29 -1
  491. endoreg_db/utils/fix_video_path_direct.py +141 -0
  492. endoreg_db/utils/frame_anonymization_utils.py +463 -0
  493. endoreg_db/utils/hashs.py +1 -0
  494. endoreg_db/utils/links/__init__.py +0 -0
  495. endoreg_db/utils/links/requirement_link.py +193 -0
  496. endoreg_db/utils/mime_types.py +0 -0
  497. endoreg_db/utils/names.py +2 -0
  498. endoreg_db/utils/paths.py +104 -0
  499. endoreg_db/utils/permissions.py +143 -0
  500. endoreg_db/utils/pipelines/Readme.md +235 -0
  501. endoreg_db/utils/pipelines/__init__.py +0 -0
  502. endoreg_db/utils/pipelines/process_video_dir.py +120 -0
  503. endoreg_db/utils/product/__init__.py +0 -0
  504. endoreg_db/utils/product/sum_emissions.py +20 -0
  505. endoreg_db/utils/product/sum_weights.py +18 -0
  506. endoreg_db/utils/pydantic_models/db_config.py +1 -1
  507. endoreg_db/utils/requirement_helpers.py +0 -0
  508. endoreg_db/utils/requirement_operator_logic/__init__.py +0 -0
  509. endoreg_db/utils/requirement_operator_logic/lab_value_operators.py +578 -0
  510. endoreg_db/utils/requirement_operator_logic/model_evaluators.py +368 -0
  511. endoreg_db/utils/translation.py +27 -0
  512. endoreg_db/utils/validate_video_detailed.py +357 -0
  513. endoreg_db/utils/video/__init__.py +19 -6
  514. endoreg_db/utils/video/extract_frames.py +37 -70
  515. endoreg_db/utils/video/ffmpeg_wrapper.py +772 -0
  516. endoreg_db/utils/video/names.py +42 -0
  517. endoreg_db/utils/video/streaming_processor.py +312 -0
  518. endoreg_db/utils/video/video_splitter.py +94 -0
  519. endoreg_db/views/Frames_NICE_and_PARIS_classifications_views.py +238 -0
  520. endoreg_db/views/__init__.py +282 -2
  521. endoreg_db/views/anonymization/__init__.py +27 -0
  522. endoreg_db/views/anonymization/media_management.py +454 -0
  523. endoreg_db/views/anonymization/overview.py +216 -0
  524. endoreg_db/views/anonymization/validate.py +63 -0
  525. endoreg_db/views/auth/__init__.py +13 -0
  526. endoreg_db/views/auth/keycloak.py +113 -0
  527. endoreg_db/views/examination/__init__.py +33 -0
  528. endoreg_db/views/examination/examination.py +37 -0
  529. endoreg_db/views/examination/examination_manifest_cache.py +26 -0
  530. endoreg_db/views/examination/get_finding_classification_choices.py +59 -0
  531. endoreg_db/views/examination/get_finding_classifications.py +36 -0
  532. endoreg_db/views/examination/get_findings.py +41 -0
  533. endoreg_db/views/examination/get_instruments.py +18 -0
  534. endoreg_db/views/examination/get_interventions.py +14 -0
  535. endoreg_db/views/finding/__init__.py +9 -0
  536. endoreg_db/views/finding/finding.py +112 -0
  537. endoreg_db/views/finding/get_classifications.py +14 -0
  538. endoreg_db/views/finding/get_interventions.py +17 -0
  539. endoreg_db/views/finding_classification/__init__.py +13 -0
  540. endoreg_db/views/finding_classification/base.py +0 -0
  541. endoreg_db/views/finding_classification/finding_classification.py +42 -0
  542. endoreg_db/views/finding_classification/get_classification_choices.py +55 -0
  543. endoreg_db/views/label/__init__.py +5 -0
  544. endoreg_db/views/label/label.py +15 -0
  545. endoreg_db/views/label_video_segment/__init__.py +16 -0
  546. endoreg_db/views/label_video_segment/create_lvs_from_annotation.py +44 -0
  547. endoreg_db/views/label_video_segment/get_lvs_by_name_and_video.py +50 -0
  548. endoreg_db/views/label_video_segment/label_video_segment.py +77 -0
  549. endoreg_db/views/label_video_segment/label_video_segment_by_label.py +174 -0
  550. endoreg_db/views/label_video_segment/label_video_segment_detail.py +73 -0
  551. endoreg_db/views/label_video_segment/update_lvs_from_annotation.py +46 -0
  552. endoreg_db/views/label_video_segment/validate.py +226 -0
  553. endoreg_db/views/media/__init__.py +9 -0
  554. endoreg_db/views/media/pdf_media.py +386 -0
  555. endoreg_db/views/media/video_media.py +272 -0
  556. endoreg_db/views/meta/__init__.py +15 -0
  557. endoreg_db/views/meta/available_files_list.py +146 -0
  558. endoreg_db/views/meta/report_meta.py +53 -0
  559. endoreg_db/views/meta/sensitive_meta_detail.py +148 -0
  560. endoreg_db/views/meta/sensitive_meta_list.py +104 -0
  561. endoreg_db/views/meta/sensitive_meta_verification.py +71 -0
  562. endoreg_db/views/misc/__init__.py +63 -0
  563. endoreg_db/views/misc/center.py +13 -0
  564. endoreg_db/views/misc/gender.py +14 -0
  565. endoreg_db/views/misc/secure_file_serving_view.py +80 -0
  566. endoreg_db/views/misc/secure_file_url_view.py +84 -0
  567. endoreg_db/views/misc/secure_url_validate.py +79 -0
  568. endoreg_db/views/misc/stats.py +220 -0
  569. endoreg_db/views/misc/translation.py +182 -0
  570. endoreg_db/views/misc/upload_views.py +240 -0
  571. endoreg_db/views/patient/__init__.py +5 -0
  572. endoreg_db/views/patient/patient.py +210 -0
  573. endoreg_db/views/patient_examination/DEPRECATED_video_backup.py +164 -0
  574. endoreg_db/views/patient_examination/__init__.py +11 -0
  575. endoreg_db/views/patient_examination/patient_examination.py +140 -0
  576. endoreg_db/views/patient_examination/patient_examination_create.py +63 -0
  577. endoreg_db/views/patient_examination/patient_examination_detail.py +66 -0
  578. endoreg_db/views/patient_examination/patient_examination_list.py +68 -0
  579. endoreg_db/views/patient_examination/video.py +194 -0
  580. endoreg_db/views/patient_finding/__init__.py +7 -0
  581. endoreg_db/views/patient_finding/base.py +0 -0
  582. endoreg_db/views/patient_finding/patient_finding.py +64 -0
  583. endoreg_db/views/patient_finding/patient_finding_optimized.py +259 -0
  584. endoreg_db/views/patient_finding_classification/__init__.py +5 -0
  585. endoreg_db/views/patient_finding_classification/pfc_create.py +67 -0
  586. endoreg_db/views/patient_finding_location/__init__.py +5 -0
  587. endoreg_db/views/patient_finding_location/pfl_create.py +70 -0
  588. endoreg_db/views/patient_finding_morphology/__init__.py +5 -0
  589. endoreg_db/views/patient_finding_morphology/pfm_create.py +70 -0
  590. endoreg_db/views/pdf/__init__.py +11 -0
  591. endoreg_db/views/pdf/pdf_media.py +239 -0
  592. endoreg_db/views/pdf/pdf_stream_views.py +127 -0
  593. endoreg_db/views/pdf/reimport.py +151 -0
  594. endoreg_db/views/report/__init__.py +9 -0
  595. endoreg_db/views/report/report_list.py +112 -0
  596. endoreg_db/views/report/report_with_secure_url.py +28 -0
  597. endoreg_db/views/report/start_examination.py +7 -0
  598. endoreg_db/views/requirement/__init__.py +10 -0
  599. endoreg_db/views/requirement/evaluate.py +279 -0
  600. endoreg_db/views/requirement/lookup.py +483 -0
  601. endoreg_db/views/requirement/lookup_store.py +252 -0
  602. endoreg_db/views/requirement_lookup/lookup.py +0 -0
  603. endoreg_db/views/requirement_lookup/lookup_store.py +0 -0
  604. endoreg_db/views/stats/__init__.py +13 -0
  605. endoreg_db/views/stats/stats_views.py +229 -0
  606. endoreg_db/views/video/__init__.py +72 -0
  607. endoreg_db/views/video/correction.py +672 -0
  608. endoreg_db/views/video/media/__init__.py +23 -0
  609. endoreg_db/views/video/media/task_status.py +49 -0
  610. endoreg_db/views/video/media/video_analyze.py +52 -0
  611. endoreg_db/views/video/media/video_apply_mask.py +48 -0
  612. endoreg_db/views/video/media/video_correction.py +21 -0
  613. endoreg_db/views/video/media/video_download_processed.py +58 -0
  614. endoreg_db/views/video/media/video_media.py +158 -0
  615. endoreg_db/views/video/media/video_meta.py +29 -0
  616. endoreg_db/views/video/media/video_processing_history.py +24 -0
  617. endoreg_db/views/video/media/video_remove_frames.py +48 -0
  618. endoreg_db/views/video/media/video_reprocess.py +40 -0
  619. endoreg_db/views/video/reimport.py +192 -0
  620. endoreg_db/views/video/segmentation.py +274 -0
  621. endoreg_db/views/{views_for_timeline.py → video/timeline.py} +3 -3
  622. endoreg_db/views/video/video_examination_viewset.py +329 -0
  623. endoreg_db/views/video/video_stream.py +188 -0
  624. endoreg_db-0.8.1.dist-info/METADATA +384 -0
  625. endoreg_db-0.8.1.dist-info/RECORD +789 -0
  626. endoreg_db/data/agl_service/data.yaml +0 -19
  627. endoreg_db/data/finding_location_classification/colonoscopy.yaml +0 -46
  628. endoreg_db/data/finding_morphology_classification/colonoscopy.yaml +0 -48
  629. endoreg_db/data/finding_morphology_classification_choice/colonoscopy_size.yaml +0 -57
  630. endoreg_db/management/commands/_load_model_template.py +0 -41
  631. endoreg_db/management/commands/delete_all.py +0 -18
  632. endoreg_db/management/commands/fetch_legacy_image_dataset.py +0 -32
  633. endoreg_db/management/commands/fix_auth_permission.py +0 -20
  634. endoreg_db/management/commands/load_active_model_data.py +0 -45
  635. endoreg_db/management/commands/load_g_play_data.py +0 -113
  636. endoreg_db/management/commands/load_logging_data.py +0 -39
  637. endoreg_db/management/commands/load_lx_data.py +0 -64
  638. endoreg_db/management/commands/load_medication_indication_data.py +0 -63
  639. endoreg_db/management/commands/load_medication_indication_type_data.py +0 -41
  640. endoreg_db/management/commands/load_medication_intake_time_data.py +0 -41
  641. endoreg_db/management/commands/load_medication_schedule_data.py +0 -55
  642. endoreg_db/management/commands/load_network_data.py +0 -57
  643. endoreg_db/migrations/0002_alter_frame_image_alter_rawframe_image.py +0 -23
  644. endoreg_db/migrations/0003_alter_frame_image_alter_rawframe_image.py +0 -23
  645. endoreg_db/migrations/0004_alter_rawvideofile_file_alter_video_file.py +0 -25
  646. endoreg_db/migrations/0005_rawvideofile_frame_count_and_more.py +0 -33
  647. endoreg_db/migrations/0006_frame_extracted_rawframe_extracted.py +0 -23
  648. endoreg_db/migrations/0007_rename_pseudo_patient_video_patient_and_more.py +0 -24
  649. endoreg_db/migrations/0008_remove_reportfile_patient_examination_and_more.py +0 -48
  650. endoreg_db/models/ai_model/active_model.py +0 -9
  651. endoreg_db/models/ai_model/ai_model.py +0 -103
  652. endoreg_db/models/ai_model/lightning/__init__.py +0 -3
  653. endoreg_db/models/ai_model/lightning/predict.py +0 -172
  654. endoreg_db/models/ai_model/lightning/prediction_visualizer.py +0 -55
  655. endoreg_db/models/ai_model/lightning/run_visualizer.py +0 -21
  656. endoreg_db/models/ai_model/model_meta.py +0 -250
  657. endoreg_db/models/annotation/__init__.py +0 -32
  658. endoreg_db/models/annotation/anonymized_image_annotation.py +0 -115
  659. endoreg_db/models/annotation/binary_classification_annotation_task.py +0 -117
  660. endoreg_db/models/annotation/image_classification.py +0 -86
  661. endoreg_db/models/annotation/video_segmentation_annotation.py +0 -52
  662. endoreg_db/models/case/__init__.py +0 -1
  663. endoreg_db/models/case/case.py +0 -34
  664. endoreg_db/models/center/center.py +0 -51
  665. endoreg_db/models/center/center_product.py +0 -33
  666. endoreg_db/models/center/center_waste.py +0 -16
  667. endoreg_db/models/data_file/__init__.py +0 -39
  668. endoreg_db/models/data_file/base_classes/__init__.py +0 -7
  669. endoreg_db/models/data_file/base_classes/abstract_frame.py +0 -100
  670. endoreg_db/models/data_file/base_classes/abstract_pdf.py +0 -136
  671. endoreg_db/models/data_file/base_classes/abstract_video.py +0 -807
  672. endoreg_db/models/data_file/base_classes/frame_helpers.py +0 -17
  673. endoreg_db/models/data_file/base_classes/prepare_bulk_frames.py +0 -19
  674. endoreg_db/models/data_file/base_classes/utils.py +0 -80
  675. endoreg_db/models/data_file/frame.py +0 -29
  676. endoreg_db/models/data_file/import_classes/__init__.py +0 -18
  677. endoreg_db/models/data_file/import_classes/processing_functions/__init__.py +0 -35
  678. endoreg_db/models/data_file/import_classes/processing_functions/pdf.py +0 -28
  679. endoreg_db/models/data_file/import_classes/processing_functions/video.py +0 -260
  680. endoreg_db/models/data_file/import_classes/raw_pdf.py +0 -260
  681. endoreg_db/models/data_file/import_classes/raw_video.py +0 -288
  682. endoreg_db/models/data_file/metadata/sensitive_meta.py +0 -290
  683. endoreg_db/models/data_file/metadata/video_meta.py +0 -199
  684. endoreg_db/models/data_file/report_file.py +0 -56
  685. endoreg_db/models/data_file/video/__init__.py +0 -11
  686. endoreg_db/models/data_file/video/import_meta.py +0 -25
  687. endoreg_db/models/data_file/video/video.py +0 -196
  688. endoreg_db/models/data_file/video_segment.py +0 -214
  689. endoreg_db/models/disease.py +0 -79
  690. endoreg_db/models/event.py +0 -73
  691. endoreg_db/models/examination/examination.py +0 -67
  692. endoreg_db/models/examination/examination_indication.py +0 -170
  693. endoreg_db/models/finding/__init__.py +0 -11
  694. endoreg_db/models/finding/finding.py +0 -75
  695. endoreg_db/models/finding/finding_location_classification.py +0 -94
  696. endoreg_db/models/finding/finding_morphology_classification.py +0 -89
  697. endoreg_db/models/finding/finding_type.py +0 -22
  698. endoreg_db/models/hardware/__init__.py +0 -2
  699. endoreg_db/models/information_source.py +0 -29
  700. endoreg_db/models/laboratory/__init__.py +0 -1
  701. endoreg_db/models/laboratory/lab_value.py +0 -111
  702. endoreg_db/models/logging/__init__.py +0 -11
  703. endoreg_db/models/logging/agl_service.py +0 -19
  704. endoreg_db/models/logging/base.py +0 -22
  705. endoreg_db/models/logging/log_type.py +0 -23
  706. endoreg_db/models/logging/network_device.py +0 -27
  707. endoreg_db/models/lx/__init__.py +0 -4
  708. endoreg_db/models/lx/client.py +0 -57
  709. endoreg_db/models/lx/identity.py +0 -34
  710. endoreg_db/models/lx/permission.py +0 -18
  711. endoreg_db/models/lx/user.py +0 -16
  712. endoreg_db/models/network/__init__.py +0 -9
  713. endoreg_db/models/network/agl_service.py +0 -38
  714. endoreg_db/models/network/network_device.py +0 -58
  715. endoreg_db/models/network/network_device_type.py +0 -23
  716. endoreg_db/models/other/distribution.py +0 -5
  717. endoreg_db/models/patient/__init__.py +0 -24
  718. endoreg_db/models/patient/patient_examination.py +0 -182
  719. endoreg_db/models/patient/patient_finding.py +0 -143
  720. endoreg_db/models/patient/patient_finding_location.py +0 -120
  721. endoreg_db/models/patient/patient_finding_morphology.py +0 -166
  722. endoreg_db/models/permissions/__init__.py +0 -44
  723. endoreg_db/models/persons/__init__.py +0 -34
  724. endoreg_db/models/persons/examiner/__init__.py +0 -2
  725. endoreg_db/models/persons/examiner/examiner.py +0 -60
  726. endoreg_db/models/persons/examiner/examiner_type.py +0 -2
  727. endoreg_db/models/persons/patient/__init__.py +0 -8
  728. endoreg_db/models/persons/patient/patient.py +0 -389
  729. endoreg_db/models/persons/patient/patient_disease.py +0 -22
  730. endoreg_db/models/persons/patient/patient_event.py +0 -52
  731. endoreg_db/models/persons/patient/patient_lab_sample.py +0 -108
  732. endoreg_db/models/persons/patient/patient_medication.py +0 -59
  733. endoreg_db/models/persons/patient/patient_medication_schedule.py +0 -88
  734. endoreg_db/models/persons/portal_user_information.py +0 -27
  735. endoreg_db/models/prediction/__init__.py +0 -8
  736. endoreg_db/models/prediction/image_classification.py +0 -51
  737. endoreg_db/models/prediction/video_prediction_meta.py +0 -306
  738. endoreg_db/models/product/product.py +0 -110
  739. endoreg_db/models/product/product_group.py +0 -27
  740. endoreg_db/models/product/product_material.py +0 -28
  741. endoreg_db/models/questionnaires/__init__.py +0 -114
  742. endoreg_db/models/quiz/__init__.py +0 -9
  743. endoreg_db/models/quiz/quiz_answer.py +0 -41
  744. endoreg_db/models/quiz/quiz_question.py +0 -54
  745. endoreg_db/models/report_reader/report_reader_config.py +0 -53
  746. endoreg_db/models/rules/__init__.py +0 -5
  747. endoreg_db/queries/get/__init__.py +0 -6
  748. endoreg_db/queries/get/center.py +0 -42
  749. endoreg_db/queries/get/model.py +0 -13
  750. endoreg_db/queries/get/patient.py +0 -14
  751. endoreg_db/queries/get/patient_examination.py +0 -20
  752. endoreg_db/queries/get/report_file.py +0 -33
  753. endoreg_db/queries/get/video.py +0 -31
  754. endoreg_db/serializers/ai_model.py +0 -19
  755. endoreg_db/serializers/annotation.py +0 -14
  756. endoreg_db/serializers/center.py +0 -11
  757. endoreg_db/serializers/examination.py +0 -33
  758. endoreg_db/serializers/frame.py +0 -9
  759. endoreg_db/serializers/hardware.py +0 -21
  760. endoreg_db/serializers/label.py +0 -22
  761. endoreg_db/serializers/patient.py +0 -33
  762. endoreg_db/serializers/prediction.py +0 -10
  763. endoreg_db/serializers/raw_pdf_anony_text_validation.py +0 -137
  764. endoreg_db/serializers/report_file.py +0 -7
  765. endoreg_db/serializers/video.py +0 -20
  766. endoreg_db/serializers/video_segmentation.py +0 -587
  767. endoreg_db/tests.py +0 -3
  768. endoreg_db/utils/legacy_ocr.py +0 -201
  769. endoreg_db/utils/video/transcode_videofile.py +0 -111
  770. endoreg_db/views/patient_views.py +0 -90
  771. endoreg_db/views/raw_pdf_anony_text_validation_views.py +0 -95
  772. endoreg_db/views/raw_pdf_meta_validation_views.py +0 -111
  773. endoreg_db/views/raw_video_meta_validation_views.py +0 -148
  774. endoreg_db/views/report_views.py +0 -96
  775. endoreg_db/views/video_segmentation_views.py +0 -166
  776. endoreg_db-0.6.3.dist-info/METADATA +0 -161
  777. endoreg_db-0.6.3.dist-info/RECORD +0 -435
  778. /endoreg_db/{case_generator/__init__.py → api/serializers/finding_descriptions.py} +0 -0
  779. /endoreg_db/{queries/get/annotation.py → api/views/finding_descriptions.py} +0 -0
  780. /endoreg_db/{queries/get/prediction.py → data/shift/m2.yaml} +0 -0
  781. /endoreg_db/{queries/get/video_import_meta.py → factories/__init__.py} +0 -0
  782. /endoreg_db/{queries/get/video_prediction_meta.py → helpers/__init__.py} +0 -0
  783. /endoreg_db/models/{case_template → administration/case/case_template}/__init__.py +0 -0
  784. /endoreg_db/models/{persons → administration/person}/person.py +0 -0
  785. /endoreg_db/models/{product → administration/product}/__init__.py +0 -0
  786. /endoreg_db/models/{report_reader → media/pdf/report_reader}/__init__.py +0 -0
  787. /endoreg_db/models/{report_reader → media/pdf/report_reader}/report_reader_flag.py +0 -0
  788. /endoreg_db/models/{hardware → medical/hardware}/endoscopy_processor.py +0 -0
  789. /endoreg_db/models/{medication → medical/medication}/__init__.py +0 -0
  790. /endoreg_db/models/{medication → medical/medication}/medication_indication.py +0 -0
  791. /endoreg_db/models/{emission → other/emission}/__init__.py +0 -0
  792. /endoreg_db/models/{rules → rule}/rule_applicator.py +0 -0
  793. /endoreg_db/{case_generator → utils/case_generator}/case_generator.py +0 -0
  794. /endoreg_db/{case_generator → utils/case_generator}/lab_sample_factory.py +0 -0
  795. /endoreg_db/{case_generator → utils/case_generator}/utils.py +0 -0
  796. /endoreg_db/views/{csrf.py → misc/csrf.py} +0 -0
  797. {endoreg_db-0.6.3.dist-info → endoreg_db-0.8.1.dist-info}/WHEEL +0 -0
  798. {endoreg_db-0.6.3.dist-info → endoreg_db-0.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,993 @@
1
+ """
2
+ PDF import service module.
3
+
4
+ Provides high-level functions for importing and anonymizing PDF files,
5
+ combining RawPdfFile creation with text extraction and anonymization.
6
+ """
7
+ from datetime import date, datetime
8
+ import logging
9
+ import shutil
10
+ import sys
11
+ import os
12
+ import hashlib
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Union
15
+ from contextlib import contextmanager
16
+ from django.conf.locale import tr
17
+ from django.db import transaction
18
+ from endoreg_db.models.media.pdf.raw_pdf import RawPdfFile
19
+ from endoreg_db.models.state.raw_pdf import RawPdfState
20
+ from endoreg_db.models import SensitiveMeta
21
+ from endoreg_db.utils.paths import PDF_DIR, STORAGE_DIR
22
+ import time
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Treat lock files older than this as stale and reclaim them (in seconds)
27
+ STALE_LOCK_SECONDS = 600
28
+
29
+ if TYPE_CHECKING:
30
+ pass # RawPdfFile already imported above
31
+
32
+
33
+ class PdfImportService:
34
+ """
35
+ Service class for importing and processing PDF files with text extraction and anonymization.
36
+ Uses a central PDF instance pattern for cleaner state management.
37
+ """
38
+
39
+ def __init__(self, allow_meta_overwrite: bool = False):
40
+ """
41
+ Initialize the PDF import service.
42
+
43
+ Args:
44
+ allow_meta_overwrite: Whether to allow overwriting existing SensitiveMeta fields
45
+ """
46
+ self.processed_files = set()
47
+ self._report_reader_available = None
48
+ self._report_reader_class = None
49
+ self.allow_meta_overwrite = allow_meta_overwrite
50
+
51
+ # Central PDF instance management
52
+ self.current_pdf = None
53
+ self.processing_context = {}
54
+
55
+ @contextmanager
56
+ def _file_lock(self, path: Path):
57
+ """Create a file lock to prevent duplicate processing.
58
+ Handles stale lock files by reclaiming after STALE_LOCK_SECONDS.
59
+ """
60
+ lock_path = Path(str(path) + ".lock")
61
+ fd = None
62
+ try:
63
+ try:
64
+ # atomic create; fail if exists
65
+ fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
66
+ except FileExistsError:
67
+ # Check for stale lock
68
+ age = None
69
+ try:
70
+ st = os.stat(lock_path)
71
+ age = time.time() - st.st_mtime
72
+ except FileNotFoundError:
73
+ # race: lock removed between exists and stat; just retry acquiring below
74
+ pass
75
+
76
+ if age is not None and age > STALE_LOCK_SECONDS:
77
+ try:
78
+ logger.warning(
79
+ "Stale lock detected for %s (age %.0fs). Reclaiming lock...",
80
+ path, age
81
+ )
82
+ lock_path.unlink()
83
+ except Exception as e:
84
+ logger.warning("Failed to remove stale lock %s: %s", lock_path, e)
85
+ # retry acquire
86
+ fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
87
+ else:
88
+ # Another worker is processing this file
89
+ raise ValueError(f"File already being processed: {path}")
90
+
91
+ os.write(fd, b"lock")
92
+ os.close(fd)
93
+ fd = None
94
+ yield
95
+ finally:
96
+ try:
97
+ if fd is not None:
98
+ os.close(fd)
99
+ if lock_path.exists():
100
+ lock_path.unlink()
101
+ except OSError:
102
+ pass
103
+
104
+ def _sha256(self, path: Path, chunk: int = 1024 * 1024) -> str:
105
+ """Compute SHA256 hash of a file."""
106
+ h = hashlib.sha256()
107
+ with open(path, "rb") as f:
108
+ while True:
109
+ b = f.read(chunk)
110
+ if not b:
111
+ break
112
+ h.update(b)
113
+ return h.hexdigest()
114
+
115
+ def _quarantine(self, source: Path) -> Path:
116
+ """Move file to quarantine directory to prevent re-processing."""
117
+ qdir = PDF_DIR / "_processing"
118
+ qdir.mkdir(parents=True, exist_ok=True)
119
+ target = qdir / source.name
120
+ # atomic rename on same filesystem
121
+ source.rename(target)
122
+ return target
123
+
124
+ def _ensure_state(self, pdf_file: "RawPdfFile"):
125
+ """Ensure PDF file has a state object."""
126
+ if getattr(pdf_file, "state", None):
127
+ return pdf_file.state
128
+ if hasattr(pdf_file, "get_or_create_state"):
129
+ state = pdf_file.get_or_create_state()
130
+ pdf_file.state = state
131
+ return state
132
+ # Very defensive fallback
133
+ try:
134
+ state, _ = pdf_file.get_or_create_state(raw_pdf_file=pdf_file)
135
+ pdf_file.state = state
136
+ return state
137
+ except Exception:
138
+ return None
139
+
140
+ def _ensure_report_reading_available(self):
141
+ """
142
+ Ensure report reading modules are available by adding lx-anonymizer to path.
143
+
144
+ Returns:
145
+ Tuple of (availability_flag, ReportReader_class)
146
+ """
147
+ if self._report_reader_available is not None:
148
+ return self._report_reader_available, self._report_reader_class
149
+
150
+ try:
151
+ # Try direct import first
152
+ from lx_anonymizer import ReportReader
153
+
154
+ logger.info("Successfully imported lx_anonymizer ReportReader module")
155
+ self._report_reader_available = True
156
+ self._report_reader_class = ReportReader
157
+ return True, ReportReader
158
+
159
+ except ImportError:
160
+ # Optional: honor LX_ANONYMIZER_PATH=/abs/path/to/src
161
+ import importlib
162
+ extra = os.getenv("LX_ANONYMIZER_PATH")
163
+ if extra and extra not in sys.path and Path(extra).exists():
164
+ sys.path.insert(0, extra)
165
+ try:
166
+ mod = importlib.import_module("lx_anonymizer")
167
+ ReportReader = getattr(mod, "ReportReader")
168
+ logger.info("Imported lx_anonymizer.ReportReader via LX_ANONYMIZER_PATH")
169
+ self._report_reader_available = True
170
+ self._report_reader_class = ReportReader
171
+ return True, ReportReader
172
+ except Exception as e:
173
+ logger.warning("Failed importing lx_anonymizer via LX_ANONYMIZER_PATH: %s", e)
174
+ finally:
175
+ # Keep path for future imports if it worked; otherwise remove.
176
+ if "ReportReader" not in locals() and extra in sys.path:
177
+ sys.path.remove(extra)
178
+
179
+ self._report_reader_available = False
180
+ self._report_reader_class = None
181
+ return False, None
182
+
183
+
184
+ def _ensure_default_patient_data(self, pdf_instance: "RawPdfFile" = None) -> None:
185
+ """
186
+ Ensure PDF has minimum required patient data in SensitiveMeta.
187
+ Creates default values if data is missing after text processing.
188
+ Uses the central PDF instance if no specific instance provided.
189
+
190
+ Args:
191
+ pdf_instance: Optional specific PDF instance, defaults to self.current_pdf
192
+ """
193
+ pdf_file = pdf_instance or self.current_pdf
194
+ if not pdf_file:
195
+ logger.warning("No PDF instance available for ensuring default patient data")
196
+ return
197
+
198
+ if not pdf_file.sensitive_meta:
199
+ logger.info(f"No SensitiveMeta found for PDF {pdf_file.pdf_hash}, creating default")
200
+
201
+ # Create default SensitiveMeta with placeholder data
202
+ default_data = {
203
+ "patient_first_name": "Patient",
204
+ "patient_last_name": "Unknown",
205
+ "patient_dob": date(1990, 1, 1), # Default DOB
206
+ "examination_date": date.today(),
207
+ "center_name": pdf_file.center.name if pdf_file.center else "university_hospital_wuerzburg"
208
+ }
209
+
210
+ try:
211
+ sensitive_meta = SensitiveMeta.create_from_dict(default_data)
212
+ pdf_file.sensitive_meta = sensitive_meta
213
+ pdf_file.save(update_fields=['sensitive_meta'])
214
+ logger.info(f"Created default SensitiveMeta for PDF {pdf_file.pdf_hash}")
215
+ except Exception as e:
216
+ logger.error(f"Failed to create default SensitiveMeta for PDF {pdf_file.pdf_hash}: {e}")
217
+
218
+ def import_and_anonymize(
219
+ self,
220
+ file_path: Union[Path, str],
221
+ center_name: str,
222
+ delete_source: bool = False,
223
+ retry: bool = False,
224
+ ) -> "RawPdfFile":
225
+ """
226
+ Import a PDF file and anonymize it using ReportReader.
227
+ Uses centralized PDF instance management pattern.
228
+
229
+ Args:
230
+ file_path: Path to the PDF file to import
231
+ center_name: Name of the center to associate with PDF
232
+ delete_source: Whether to delete the source file after import
233
+ retry: Whether this is a retry attempt
234
+
235
+ Returns:
236
+ RawPdfFile instance after import and processing
237
+
238
+ Raises:
239
+ Exception: On any failure during import or processing
240
+ """
241
+ try:
242
+ # Initialize processing context
243
+ self._initialize_processing_context(file_path, center_name, delete_source, retry)
244
+
245
+ # Step 1: Validate and prepare file
246
+ self._validate_and_prepare_file()
247
+
248
+ # Step 2: Create or retrieve PDF instance
249
+ self._create_or_retrieve_pdf_instance()
250
+
251
+ # Early return check - if no PDF instance was created, return None
252
+ if not self.current_pdf:
253
+ logger.warning(f"No PDF instance created for {file_path}, returning None")
254
+ return None
255
+
256
+ # Step 3: Setup processing environment
257
+ self._setup_processing_environment()
258
+
259
+ # Step 4: Process text and metadata
260
+ self._process_text_and_metadata()
261
+
262
+ # Step 5: Finalize processing
263
+ self._finalize_processing()
264
+
265
+ return self.current_pdf
266
+
267
+ except ValueError as e:
268
+ # Handle "File already being processed" case specifically
269
+ if "already being processed" in str(e):
270
+ logger.info(f"Skipping file {file_path}: {e}")
271
+ return None
272
+ else:
273
+ logger.error(f"PDF import failed for {file_path}: {e}")
274
+ self._cleanup_on_error()
275
+ raise
276
+ except Exception as e:
277
+ logger.error(f"PDF import failed for {file_path}: {e}")
278
+ # Cleanup on error
279
+ self._cleanup_on_error()
280
+ raise
281
+ finally:
282
+ # Always cleanup context
283
+ self._cleanup_processing_context()
284
+
285
+ def _initialize_processing_context(self, file_path: Union[Path, str], center_name: str,
286
+ delete_source: bool, retry: bool):
287
+ """Initialize the processing context for the current PDF."""
288
+ self.processing_context = {
289
+ 'file_path': Path(file_path),
290
+ 'center_name': center_name,
291
+ 'delete_source': delete_source,
292
+ 'retry': retry,
293
+ 'file_hash': None,
294
+ 'processing_started': False,
295
+ 'text_extracted': False,
296
+ 'metadata_processed': False,
297
+ 'anonymization_completed': False
298
+ }
299
+
300
+ # Check if already processed (only during current session to prevent race conditions)
301
+ if str(file_path) in self.processed_files:
302
+ logger.info(f"File {file_path} already being processed in current session, skipping")
303
+ raise ValueError("File already being processed")
304
+
305
+ logger.info(f"Starting import and processing for: {file_path}")
306
+
307
+ def _validate_and_prepare_file(self):
308
+ """Validate file existence and calculate hash."""
309
+ file_path = self.processing_context['file_path']
310
+
311
+ if not file_path.exists():
312
+ raise FileNotFoundError(f"PDF file not found: {file_path}")
313
+
314
+ try:
315
+ self.processing_context['file_hash'] = self._sha256(file_path)
316
+ except Exception as e:
317
+ logger.warning(f"Could not calculate file hash: {e}")
318
+ self.processing_context['file_hash'] = None
319
+
320
+ def _create_or_retrieve_pdf_instance(self):
321
+ """Create new or retrieve existing PDF instance."""
322
+ file_path = self.processing_context['file_path']
323
+ center_name = self.processing_context['center_name']
324
+ delete_source = self.processing_context['delete_source']
325
+ retry = self.processing_context['retry']
326
+ file_hash = self.processing_context['file_hash']
327
+
328
+ if not retry:
329
+ # Check for existing PDF and handle duplicates
330
+ with self._file_lock(file_path):
331
+ existing = None
332
+ if file_hash and RawPdfFile.objects.filter(pdf_hash=file_hash).exists():
333
+ existing = RawPdfFile.objects.get(pdf_hash=file_hash)
334
+
335
+ if existing:
336
+ logger.info(f"Found existing RawPdfFile {existing.pdf_hash}")
337
+ if existing.text:
338
+ logger.info(f"Existing PDF {existing.pdf_hash} already processed - returning")
339
+ self.current_pdf = existing
340
+ return
341
+ else:
342
+ # Retry processing
343
+ logger.info(f"Reprocessing existing PDF {existing.pdf_hash}")
344
+ return self._retry_existing_pdf(existing)
345
+
346
+ # Create new PDF instance
347
+ logger.info("Creating new RawPdfFile instance...")
348
+ from django.db import IntegrityError
349
+
350
+ try:
351
+ if not retry:
352
+ self.current_pdf = RawPdfFile.create_from_file_initialized(
353
+ file_path=file_path,
354
+ center_name=center_name,
355
+ delete_source=delete_source,
356
+ )
357
+ else:
358
+ # Retrieve existing for retry
359
+ self.current_pdf = RawPdfFile.objects.get(pdf_hash=file_hash)
360
+ logger.info(f"Retrying import for existing RawPdfFile {self.current_pdf.pdf_hash}")
361
+
362
+ # Check if retry is actually needed
363
+ if self.current_pdf.text:
364
+ logger.info(f"Existing PDF {self.current_pdf.pdf_hash} already processed during retry - returning")
365
+ return
366
+
367
+ if not self.current_pdf:
368
+ raise RuntimeError("Failed to create RawPdfFile instance")
369
+
370
+ logger.info(f"PDF instance ready: {self.current_pdf.pdf_hash}")
371
+
372
+ except IntegrityError:
373
+ # Race condition - another worker created it
374
+ if file_hash:
375
+ self.current_pdf = RawPdfFile.objects.get(pdf_hash=file_hash)
376
+ logger.info("Race condition detected, using existing RawPdfFile")
377
+ else:
378
+ raise
379
+
380
+ def _setup_processing_environment(self):
381
+ """Setup processing environment and state."""
382
+ # Create sensitive file copy
383
+ self.create_sensitive_file(self.current_pdf, self.processing_context['file_path'])
384
+
385
+ # Update file path to point to sensitive copy
386
+ self.processing_context['file_path'] = self.current_pdf.file.path
387
+
388
+ # Ensure state exists
389
+ state = self.current_pdf.get_or_create_state()
390
+ state.mark_processing_started()
391
+ self.processing_context['processing_started'] = True
392
+
393
+ # Mark as processed to prevent duplicates
394
+ self.processed_files.add(str(self.processing_context['file_path']))
395
+
396
+ # Ensure default patient data
397
+ logger.info("Ensuring default patient data...")
398
+ self._ensure_default_patient_data(self.current_pdf)
399
+
400
+ def _process_text_and_metadata(self):
401
+ """Process text extraction and metadata using ReportReader."""
402
+ report_reading_available, ReportReader = self._ensure_report_reading_available()
403
+
404
+ if not report_reading_available:
405
+ logger.warning("Report reading not available (lx_anonymizer not found)")
406
+ self._mark_processing_incomplete("no_report_reader")
407
+ return
408
+
409
+ if not self.current_pdf.file:
410
+ logger.warning("No file available for text processing")
411
+ self._mark_processing_incomplete("no_file")
412
+ return
413
+
414
+ try:
415
+ logger.info("Starting text extraction and metadata processing with ReportReader...")
416
+
417
+ # Setup output directories
418
+ crops_dir = PDF_DIR / 'cropped_regions'
419
+ anonymized_dir = PDF_DIR / 'anonymized'
420
+ crops_dir.mkdir(parents=True, exist_ok=True)
421
+ anonymized_dir.mkdir(parents=True, exist_ok=True)
422
+
423
+ # Initialize ReportReader
424
+ report_reader = ReportReader(
425
+ report_root_path=STORAGE_DIR,
426
+ locale="de_DE",
427
+ text_date_format="%d.%m.%Y"
428
+ )
429
+
430
+ # Process with cropping
431
+ original_text, anonymized_text, extracted_metadata, cropped_regions, anonymized_pdf_path = report_reader.process_report_with_cropping(
432
+ pdf_path=self.processing_context['file_path'],
433
+ crop_sensitive_regions=True,
434
+ crop_output_dir=str(crops_dir),
435
+ anonymization_output_dir=str(anonymized_dir)
436
+ )
437
+
438
+ # Store results in context
439
+ self.processing_context.update({
440
+ 'original_text': original_text,
441
+ 'anonymized_text': anonymized_text,
442
+ 'extracted_metadata': extracted_metadata,
443
+ 'cropped_regions': cropped_regions,
444
+ 'anonymized_pdf_path': anonymized_pdf_path
445
+ })
446
+
447
+ if original_text:
448
+ self._apply_text_results()
449
+ self.processing_context['text_extracted'] = True
450
+
451
+ if extracted_metadata:
452
+ self._apply_metadata_results()
453
+ self.processing_context['metadata_processed'] = True
454
+
455
+ if anonymized_pdf_path:
456
+ self._apply_anonymized_pdf()
457
+ self.processing_context['anonymization_completed'] = True
458
+
459
+ except Exception as e:
460
+ logger.warning(f"Text processing failed: {e}")
461
+ self._mark_processing_incomplete("text_processing_failed")
462
+
463
+ def _apply_text_results(self):
464
+ """Apply text extraction results to the PDF instance."""
465
+ if not self.current_pdf:
466
+ logger.warning("Cannot apply text results - no PDF instance available")
467
+ return
468
+
469
+ original_text = self.processing_context.get('original_text')
470
+ anonymized_text = self.processing_context.get('anonymized_text')
471
+
472
+ if not original_text:
473
+ logger.warning("No original text available to apply")
474
+ return
475
+
476
+ # Store extracted text
477
+ self.current_pdf.text = original_text
478
+ logger.info(f"Extracted {len(original_text)} characters of text from PDF")
479
+
480
+ # Handle anonymized text
481
+ if anonymized_text and anonymized_text != original_text:
482
+ self.current_pdf.anonymized = True
483
+ logger.info("PDF text anonymization completed")
484
+
485
+ def _apply_metadata_results(self):
486
+ """Apply metadata extraction results to SensitiveMeta."""
487
+ if not self.current_pdf:
488
+ logger.warning("Cannot apply metadata results - no PDF instance available")
489
+ return
490
+
491
+ extracted_metadata = self.processing_context.get('extracted_metadata')
492
+
493
+ if not self.current_pdf.sensitive_meta or not extracted_metadata:
494
+ logger.debug("No sensitive meta or extracted metadata available")
495
+ return
496
+
497
+ sm = self.current_pdf.sensitive_meta
498
+
499
+ # Map ReportReader metadata to SensitiveMeta fields
500
+ metadata_mapping = {
501
+ 'patient_first_name': 'patient_first_name',
502
+ 'patient_last_name': 'patient_last_name',
503
+ 'patient_dob': 'patient_dob',
504
+ 'examination_date': 'examination_date',
505
+ 'examiner_first_name': 'examiner_first_name',
506
+ 'examiner_last_name': 'examiner_last_name',
507
+ 'endoscope_type': 'endoscope_type',
508
+ 'casenumber': 'case_number'
509
+ }
510
+
511
+ # Update fields with extracted information
512
+ updated_fields = []
513
+ for meta_key, sm_field in metadata_mapping.items():
514
+ if extracted_metadata.get(meta_key) and hasattr(sm, sm_field):
515
+ old_value = getattr(sm, sm_field)
516
+ raw_value = extracted_metadata[meta_key]
517
+
518
+ # Skip if we just got the field name as a string (indicates no actual data)
519
+ if isinstance(raw_value, str) and raw_value == meta_key:
520
+ continue
521
+
522
+ # Handle date fields specially
523
+ if sm_field in ['patient_dob', 'examination_date']:
524
+ new_value = self._parse_date_field(raw_value, meta_key, sm_field)
525
+ if new_value is None:
526
+ continue
527
+ else:
528
+ new_value = raw_value
529
+
530
+ # Configurable overwrite policy
531
+ should_overwrite = (
532
+ self.allow_meta_overwrite
533
+ or not old_value
534
+ or old_value in ['Patient', 'Unknown']
535
+ )
536
+ if new_value and should_overwrite:
537
+ setattr(sm, sm_field, new_value)
538
+ updated_fields.append(sm_field)
539
+
540
+ if updated_fields:
541
+ sm.save()
542
+ logger.info(f"Updated SensitiveMeta fields: {updated_fields}")
543
+
544
+ def _parse_date_field(self, raw_value, meta_key, sm_field):
545
+ """Parse date field with error handling."""
546
+ try:
547
+ if isinstance(raw_value, str):
548
+ # Skip if the value is just the field name itself
549
+ if raw_value == meta_key:
550
+ logger.warning(
551
+ "Skipping date field %s - got field name '%s' instead of actual date",
552
+ sm_field, raw_value
553
+ )
554
+ return None
555
+
556
+ # Try common date formats
557
+ date_formats = ['%Y-%m-%d', '%d.%m.%Y', '%d/%m/%Y', '%m/%d/%Y']
558
+ for fmt in date_formats:
559
+ try:
560
+ return datetime.strptime(raw_value, fmt).date()
561
+ except ValueError:
562
+ continue
563
+
564
+ logger.warning("Could not parse date '%s' for field %s", raw_value, sm_field)
565
+ return None
566
+
567
+ elif hasattr(raw_value, 'date'):
568
+ return raw_value.date()
569
+ else:
570
+ return raw_value
571
+
572
+ except (ValueError, AttributeError) as e:
573
+ logger.warning("Date parsing failed for %s: %s", sm_field, e)
574
+ return None
575
+
576
+ # from gc-08
577
+ def _apply_anonymized_pdf(self):
578
+ """
579
+ Attach the already-generated anonymized PDF without copying bytes.
580
+
581
+ We do NOT re-upload or re-save file bytes via Django storage (which would
582
+ place a new file under upload_to='raw_pdfs' and retrigger the watcher).
583
+ Instead, we point the FileField to the path that the anonymizer already
584
+ wrote (ideally relative to STORAGE_DIR). Additionally, we make sure the
585
+ model/state reflect that anonymization is done even if text didn't change.
586
+ """
587
+ if not self.current_pdf:
588
+ logger.warning("Cannot apply anonymized PDF - no PDF instance available")
589
+ return
590
+
591
+ anonymized_pdf_path = self.processing_context.get('anonymized_pdf_path')
592
+ if not anonymized_pdf_path:
593
+ logger.debug("No anonymized_pdf_path present in processing context")
594
+ return
595
+
596
+ anonymized_path = Path(anonymized_pdf_path)
597
+ if not anonymized_path.exists():
598
+ logger.warning("Anonymized PDF path returned but file does not exist: %s", anonymized_path)
599
+ return
600
+
601
+ logger.info("Anonymized PDF created by ReportReader at: %s", anonymized_path)
602
+
603
+ try:
604
+ # Prefer storing a path relative to STORAGE_DIR so Django serves it correctly
605
+ try:
606
+ relative_name = str(anonymized_path.relative_to(STORAGE_DIR))
607
+ except ValueError:
608
+ # Fallback to absolute path if the file lives outside STORAGE_DIR
609
+ relative_name = str(anonymized_path)
610
+
611
+ # Only update if something actually changed
612
+ if getattr(self.current_pdf.anonymized_file, 'name', None) != relative_name:
613
+ self.current_pdf.anonymized_file.name = relative_name
614
+
615
+ # Ensure model/state reflect anonymization even if text didn't differ
616
+ if not getattr(self.current_pdf, "anonymized", False):
617
+ self.current_pdf.anonymized = True
618
+
619
+ # Persist cropped regions info somewhere useful (optional & non-breaking)
620
+ # If your model has a field for this, persist there; otherwise we just log.
621
+ cropped_regions = self.processing_context.get('cropped_regions')
622
+ if cropped_regions:
623
+ logger.debug("Cropped regions recorded (%d regions).", len(cropped_regions))
624
+
625
+ # Save model changes
626
+ update_fields = ['anonymized_file']
627
+ if 'anonymized' in self.current_pdf.__dict__:
628
+ update_fields.append('anonymized')
629
+ self.current_pdf.save(update_fields=update_fields)
630
+
631
+ # Mark state as anonymized immediately; this keeps downstream flows working
632
+ state = self._ensure_state(self.current_pdf)
633
+ if state and not state.anonymized:
634
+ state.mark_anonymized(save=True)
635
+
636
+ logger.info("Updated anonymized_file reference to: %s", self.current_pdf.anonymized_file.name)
637
+
638
+ except Exception as e:
639
+ logger.warning("Could not set anonymized file reference: %s", e)
640
+
641
+ '''def _apply_anonymized_pdf(self):
642
+ """Apply anonymized PDF results."""
643
+ if not self.current_pdf:
644
+ logger.warning("Cannot apply anonymized PDF - no PDF instance available")
645
+ return
646
+
647
+ anonymized_pdf_path = self.processing_context.get('anonymized_pdf_path')
648
+
649
+ if not anonymized_pdf_path:
650
+ return
651
+
652
+ anonymized_path = Path(anonymized_pdf_path)
653
+ if anonymized_path.exists():
654
+ logger.info(f"Anonymized PDF created by ReportReader at: {anonymized_path}")
655
+ try:
656
+ from django.core.files.base import File
657
+ with open(anonymized_path, 'rb') as f:
658
+ django_file = File(f)
659
+ self.current_pdf.anonymized_file.save(
660
+ anonymized_path.name,
661
+ django_file,
662
+ save=False
663
+ )
664
+ except Exception as e:
665
+ logger.warning(f"Could not set anonymized file reference: {e}")
666
+ else:
667
+ logger.warning(f"Anonymized PDF path returned but file does not exist: {anonymized_path}")'''
668
+
669
+
670
+
671
+
672
+ def _finalize_processing(self):
673
+ """Finalize processing and update state."""
674
+ if not self.current_pdf:
675
+ logger.warning("Cannot finalize processing - no PDF instance available")
676
+ return
677
+
678
+ try:
679
+ # Update state based on processing results
680
+ state = self._ensure_state(self.current_pdf)
681
+
682
+ if self.processing_context.get('text_extracted') and state:
683
+ state.mark_anonymized()
684
+
685
+ # Save all changes
686
+ with transaction.atomic():
687
+ self.current_pdf.save()
688
+ if state:
689
+ state.save()
690
+
691
+ logger.info("PDF processing completed successfully")
692
+ except Exception as e:
693
+ logger.warning(f"Failed to finalize processing: {e}")
694
+
695
+ def _mark_processing_incomplete(self, reason: str):
696
+ """Mark processing as incomplete with reason."""
697
+ if not self.current_pdf:
698
+ logger.warning(f"Cannot mark processing incomplete - no PDF instance available. Reason: {reason}")
699
+ return
700
+
701
+ try:
702
+ state = self._ensure_state(self.current_pdf)
703
+ if state:
704
+ state.text_meta_extracted = False
705
+ state.pdf_meta_extracted = False
706
+ state.sensitive_meta_processed = False
707
+ state.save()
708
+ logger.info(f"Set PDF state: processed=False due to {reason}")
709
+
710
+ # Save changes
711
+ with transaction.atomic():
712
+ self.current_pdf.save()
713
+ except Exception as e:
714
+ logger.warning(f"Failed to mark processing incomplete: {e}")
715
+
716
+ def _retry_existing_pdf(self, existing_pdf):
717
+ """Retry processing for existing PDF."""
718
+ try:
719
+ # Remove from processed files to allow retry
720
+ file_path_str = str(existing_pdf.file.path) if existing_pdf.file else None
721
+ if file_path_str and file_path_str in self.processed_files:
722
+ self.processed_files.remove(file_path_str)
723
+ logger.debug(f"Removed {file_path_str} from processed files for retry")
724
+
725
+ return self.import_and_anonymize(
726
+ file_path=existing_pdf.file.path,
727
+ center_name=existing_pdf.center.name if existing_pdf.center else "unknown_center",
728
+ delete_source=False,
729
+ retry=True
730
+ )
731
+ except Exception as e:
732
+ logger.error(f"Failed to re-import existing PDF {existing_pdf.pdf_hash}: {e}")
733
+ self.current_pdf = existing_pdf
734
+ return existing_pdf
735
+
736
+ def _cleanup_on_error(self):
737
+ """Cleanup processing context on error."""
738
+ try:
739
+ if self.current_pdf and hasattr(self.current_pdf, 'state'):
740
+ state = self._ensure_state(self.current_pdf)
741
+ if state and self.processing_context.get('processing_started'):
742
+ state.text_meta_extracted = False
743
+ state.pdf_meta_extracted = False
744
+ state.sensitive_meta_processed = False
745
+ state.save()
746
+ logger.debug("Updated PDF state to indicate processing failure")
747
+ except Exception as e:
748
+ logger.warning(f"Error during cleanup: {e}")
749
+ finally:
750
+ # Always clean up processed files set to prevent blocks
751
+ file_path = self.processing_context.get('file_path')
752
+ if file_path and str(file_path) in self.processed_files:
753
+ self.processed_files.remove(str(file_path))
754
+ logger.debug(f"Removed {file_path} from processed files during error cleanup")
755
+
756
+ def _cleanup_processing_context(self):
757
+ """Cleanup processing context."""
758
+ try:
759
+ # Clean up temporary directories
760
+ if self.processing_context.get('text_extracted'):
761
+ crops_dir = PDF_DIR / 'cropped_regions'
762
+ if crops_dir.exists() and not any(crops_dir.iterdir()):
763
+ crops_dir.rmdir()
764
+
765
+ # Always remove from processed files set after processing attempt
766
+ file_path = self.processing_context.get('file_path')
767
+ if file_path and str(file_path) in self.processed_files:
768
+ self.processed_files.remove(str(file_path))
769
+ logger.debug(f"Removed {file_path} from processed files set")
770
+
771
+ except Exception as e:
772
+ logger.warning(f"Error during context cleanup: {e}")
773
+ finally:
774
+ # Reset context
775
+ self.current_pdf = None
776
+ self.processing_context = {}
777
+
778
+ def import_simple(
779
+ self,
780
+ file_path: Union[Path, str],
781
+ center_name: str,
782
+ delete_source: bool = False
783
+ ) -> "RawPdfFile":
784
+ """
785
+ Simple PDF import without text processing or anonymization.
786
+ Uses centralized PDF instance management pattern.
787
+
788
+ Args:
789
+ file_path: Path to the PDF file to import
790
+ center_name: Name of the center to associate with PDF
791
+ delete_source: Whether to delete the source file after import
792
+
793
+ Returns:
794
+ RawPdfFile instance after basic import
795
+ """
796
+ try:
797
+ # Initialize simple processing context
798
+ self._initialize_processing_context(file_path, center_name, delete_source, False)
799
+
800
+ # Validate file
801
+ self._validate_and_prepare_file()
802
+
803
+ # Create PDF instance
804
+ logger.info("Starting simple import - creating RawPdfFile instance...")
805
+ self.current_pdf = RawPdfFile.create_from_file_initialized(
806
+ file_path=self.processing_context['file_path'],
807
+ center_name=center_name,
808
+ delete_source=delete_source,
809
+ )
810
+
811
+ if not self.current_pdf:
812
+ raise RuntimeError("Failed to create RawPdfFile instance")
813
+
814
+ # Mark as processed
815
+ self.processed_files.add(str(self.processing_context['file_path']))
816
+
817
+ # Set basic state for simple import
818
+ state = self._ensure_state(self.current_pdf)
819
+ if state:
820
+ state.text_meta_extracted = False
821
+ state.pdf_meta_extracted = False
822
+ state.sensitive_meta_processed = False
823
+ state.save()
824
+ logger.info("Set PDF state: processed=False for simple import")
825
+
826
+ # Save changes
827
+ with transaction.atomic():
828
+ self.current_pdf.save()
829
+
830
+ logger.info("Simple import completed for RawPdfFile hash: %s", self.current_pdf.pdf_hash)
831
+ return self.current_pdf
832
+
833
+ except Exception as e:
834
+ logger.error(f"Simple PDF import failed for {file_path}: {e}")
835
+ self._cleanup_on_error()
836
+ raise
837
+ finally:
838
+ self._cleanup_processing_context()
839
+
840
+ def check_storage_capacity(self, file_path: Union[Path, str], storage_root, min_required_space) -> None:
841
+ """
842
+ Check if there is sufficient storage capacity for the PDF file.
843
+
844
+ Args:
845
+ file_path: Path to the PDF file to check
846
+
847
+ Raises:
848
+ InsufficientStorageError: If there is not enough space
849
+ """
850
+ import shutil
851
+ from endoreg_db.exceptions import InsufficientStorageError
852
+
853
+ file_path = Path(file_path)
854
+ if not file_path.exists():
855
+ raise FileNotFoundError(f"File not found for storage check: {file_path}")
856
+
857
+ # Get the size of the file
858
+ file_size = file_path.stat().st_size
859
+
860
+ # Get available space in the storage directory
861
+
862
+ total, used, free = shutil.disk_usage(storage_root)
863
+
864
+ if file_size:
865
+ min_required_space = file_size if isinstance(min_required_space, int) else 0
866
+
867
+ # Check if there is enough space
868
+ if file_size > free:
869
+ raise InsufficientStorageError(f"Not enough space to store PDF file: {file_path}")
870
+ logger.info(f"Storage check passed for {file_path}: {file_size} bytes, {free} bytes available")
871
+
872
+ return True
873
+
874
+ def create_sensitive_file(self, pdf_instance: "RawPdfFile" = None, file_path: Union[Path, str] = None) -> None:
875
+ """
876
+ Create a copy of the PDF file in the sensitive directory and update the file reference.
877
+ Delete the source path to avoid duplicates.
878
+ Uses the central PDF instance and processing context if parameters not provided.
879
+
880
+ Ensures the FileField points to the file under STORAGE_DIR/pdfs/sensitive and never back to raw_pdfs.
881
+ """
882
+ pdf_file = pdf_instance or self.current_pdf
883
+ source_path = Path(file_path) if file_path else self.processing_context.get('file_path')
884
+
885
+ if not pdf_file:
886
+ raise ValueError("No PDF instance available for creating sensitive file")
887
+ if not source_path:
888
+ raise ValueError("No file path available for creating sensitive file")
889
+
890
+ SENSITIVE_DIR = PDF_DIR / "sensitive"
891
+ target = SENSITIVE_DIR / f"{pdf_file.pdf_hash}.pdf"
892
+
893
+ try:
894
+ os.makedirs(SENSITIVE_DIR, exist_ok=True)
895
+
896
+ # If source already is the target, just ensure FileField points correctly
897
+ if source_path.resolve() == target.resolve():
898
+ pass
899
+ else:
900
+ # Move the file from ingress to sensitive storage
901
+ # Using replace semantics when target exists (re-import)
902
+ if target.exists():
903
+ try:
904
+ target.unlink()
905
+ except Exception as e:
906
+ logger.warning("Could not remove existing sensitive target %s: %s", target, e)
907
+ shutil.move(str(source_path), str(target))
908
+ logger.info(f"Moved PDF to sensitive directory: {target}")
909
+
910
+ # Update FileField to reference the file under STORAGE_DIR
911
+ # We avoid re-saving file content (the file is already at target); set .name relative to STORAGE_DIR
912
+ try:
913
+ relative_name = str(target.relative_to(STORAGE_DIR)) #just point the Django FileField to the file that the anonymizer already created in data/pdfs/anonymized/.
914
+ except ValueError:
915
+ # Fallback: if target is not under STORAGE_DIR, store absolute path (not ideal)
916
+ relative_name = str(target)
917
+
918
+ # Only update when changed
919
+ if getattr(pdf_file.file, 'name', None) != relative_name:
920
+ pdf_file.file.name = relative_name
921
+ pdf_file.save(update_fields=['file'])
922
+ logger.info("Updated PDF FileField reference to sensitive path: %s", pdf_file.file.path)
923
+ else:
924
+ logger.debug("PDF FileField already points to sensitive path: %s", pdf_file.file.path)
925
+
926
+ # Best-effort: if original source still exists (e.g., copy), remove it to avoid re-triggers
927
+ try:
928
+ if source_path.exists() and source_path != target:
929
+ os.remove(source_path)
930
+ logger.info(f"Removed original PDF file at ingress: {source_path}")
931
+ except OSError as e:
932
+ logger.warning(f"Could not delete original PDF file {source_path}: {e}")
933
+
934
+ except Exception as e:
935
+ logger.warning(f"Could not create sensitive file copy for {pdf_file.pdf_hash}: {e}", exc_info=True)
936
+
937
+ def archive_or_quarantine_file(self, pdf_instance: "RawPdfFile" = None, source_file_path: Union[Path, str] = None,
938
+ quarantine_reason: str = None, is_pdf_problematic: bool = None) -> bool:
939
+ """
940
+ Archive or quarantine file based on the state of the PDF processing.
941
+ Uses the central PDF instance and processing context if parameters not provided.
942
+
943
+ Args:
944
+ pdf_instance: Optional PDF instance, defaults to self.current_pdf
945
+ source_file_path: Optional source file path, defaults to processing_context['file_path']
946
+ quarantine_reason: Optional quarantine reason, defaults to processing_context['error_reason']
947
+ is_pdf_problematic: Optional override for problematic state
948
+
949
+ Returns:
950
+ bool: True if file was quarantined, False if archived successfully
951
+ """
952
+ pdf_file = pdf_instance or self.current_pdf
953
+ file_path = Path(source_file_path) if source_file_path else self.processing_context.get('file_path')
954
+ quarantine_reason = quarantine_reason or self.processing_context.get('error_reason')
955
+
956
+ if not pdf_file:
957
+ raise ValueError("No PDF instance available for archiving/quarantine")
958
+ if not file_path:
959
+ raise ValueError("No file path available for archiving/quarantine")
960
+
961
+ # Determine if the PDF is problematic
962
+ pdf_problematic = is_pdf_problematic if is_pdf_problematic is not None else pdf_file.is_problematic
963
+
964
+ if pdf_problematic:
965
+ # Quarantine the file
966
+ logger.warning(f"Quarantining problematic PDF: {pdf_file.pdf_hash}, reason: {quarantine_reason}")
967
+ quarantine_dir = PDF_DIR / "quarantine"
968
+ os.makedirs(quarantine_dir, exist_ok=True)
969
+
970
+ quarantine_path = quarantine_dir / f"{pdf_file.pdf_hash}.pdf"
971
+ try:
972
+ shutil.move(file_path, quarantine_path)
973
+ pdf_file.quarantine_reason = quarantine_reason or "File processing failed"
974
+ pdf_file.save(update_fields=['quarantine_reason'])
975
+ logger.info(f"Moved problematic PDF to quarantine: {quarantine_path}")
976
+ return True
977
+ except Exception as e:
978
+ logger.error(f"Failed to quarantine PDF {pdf_file.pdf_hash}: {e}")
979
+ return True # Still consider as quarantined to prevent further processing
980
+ else:
981
+ # Archive the file normally
982
+ logger.info(f"Archiving successfully processed PDF: {pdf_file.pdf_hash}")
983
+ archive_dir = PDF_DIR / "processed"
984
+ os.makedirs(archive_dir, exist_ok=True)
985
+
986
+ archive_path = archive_dir / f"{pdf_file.pdf_hash}.pdf"
987
+ try:
988
+ shutil.move(file_path, archive_path)
989
+ logger.info(f"Moved processed PDF to archive: {archive_path}")
990
+ return False
991
+ except Exception as e:
992
+ logger.error(f"Failed to archive PDF {pdf_file.pdf_hash}: {e}")
993
+ return False