endoreg-db 0.4.5__py3-none-any.whl → 0.8.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (846) hide show
  1. endoreg_db/admin.py +90 -1
  2. endoreg_db/api_urls.py +4 -0
  3. endoreg_db/apps.py +12 -0
  4. endoreg_db/assets/dummy_model.ckpt +1 -0
  5. endoreg_db/codemods/readme.md +88 -0
  6. endoreg_db/codemods/rename_datetime_fields.py +92 -0
  7. endoreg_db/config/env.py +101 -0
  8. endoreg_db/data/__init__.py +76 -4
  9. endoreg_db/data/ai_model/data.yaml +7 -0
  10. endoreg_db/data/{label → ai_model_label}/label/data.yaml +27 -1
  11. endoreg_db/data/ai_model_label/label/polyp_classification.yaml +52 -0
  12. endoreg_db/data/ai_model_label/label-set/data.yaml +40 -0
  13. endoreg_db/data/ai_model_label/label-set/polyp_classifications.yaml +25 -0
  14. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +27 -0
  15. endoreg_db/data/ai_model_video_segmentation_label/base_segmentation.yaml +176 -0
  16. endoreg_db/data/ai_model_video_segmentation_labelset/data.yaml +20 -0
  17. endoreg_db/data/center/data.yaml +40 -9
  18. endoreg_db/data/center_shift/ukw.yaml +9 -0
  19. endoreg_db/data/contraindication/bleeding.yaml +11 -0
  20. endoreg_db/data/db_summary.csv +58 -0
  21. endoreg_db/data/db_summary.xlsx +0 -0
  22. endoreg_db/data/disease/misc.yaml +1 -2
  23. endoreg_db/data/disease_classification/chronic_kidney_disease.yaml +2 -2
  24. endoreg_db/data/disease_classification_choice/chronic_kidney_disease.yaml +6 -6
  25. endoreg_db/data/distribution/numeric/data.yaml +14 -0
  26. endoreg_db/data/endoscope/data.yaml +93 -0
  27. endoreg_db/data/endoscopy_processor/data.yaml +3 -0
  28. endoreg_db/data/event/cardiology.yaml +0 -13
  29. endoreg_db/data/examination/examinations/data.yaml +34 -28
  30. endoreg_db/data/examination/type/data.yaml +12 -0
  31. endoreg_db/data/examination_indication/endoscopy.yaml +424 -0
  32. endoreg_db/data/examination_indication_classification/endoscopy.yaml +160 -0
  33. endoreg_db/data/examination_indication_classification_choice/endoscopy.yaml +101 -0
  34. endoreg_db/data/examination_requirement_set/colonoscopy.yaml +15 -0
  35. endoreg_db/data/finding/anatomy_colon.yaml +128 -0
  36. endoreg_db/data/finding/colonoscopy.yaml +40 -0
  37. endoreg_db/data/finding/colonoscopy_bowel_prep.yaml +56 -0
  38. endoreg_db/data/finding/complication.yaml +16 -0
  39. endoreg_db/data/finding/data.yaml +105 -0
  40. endoreg_db/data/finding/examination_setting.yaml +16 -0
  41. endoreg_db/data/finding/medication_related.yaml +18 -0
  42. endoreg_db/data/finding/outcome.yaml +12 -0
  43. endoreg_db/data/finding_classification/colonoscopy_bowel_preparation.yaml +95 -0
  44. endoreg_db/data/finding_classification/colonoscopy_jnet.yaml +22 -0
  45. endoreg_db/data/finding_classification/colonoscopy_kudo.yaml +25 -0
  46. endoreg_db/data/finding_classification/colonoscopy_lesion_circularity.yaml +20 -0
  47. endoreg_db/data/finding_classification/colonoscopy_lesion_planarity.yaml +24 -0
  48. endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +68 -0
  49. endoreg_db/data/finding_classification/colonoscopy_lesion_surface.yaml +20 -0
  50. endoreg_db/data/finding_classification/colonoscopy_location.yaml +80 -0
  51. endoreg_db/data/finding_classification/colonoscopy_lst.yaml +21 -0
  52. endoreg_db/data/finding_classification/colonoscopy_nice.yaml +20 -0
  53. endoreg_db/data/finding_classification/colonoscopy_paris.yaml +26 -0
  54. endoreg_db/data/finding_classification/colonoscopy_sano.yaml +22 -0
  55. endoreg_db/data/finding_classification/colonoscopy_summary.yaml +53 -0
  56. endoreg_db/data/finding_classification/complication_generic.yaml +25 -0
  57. endoreg_db/data/finding_classification/examination_setting_generic.yaml +40 -0
  58. endoreg_db/data/finding_classification/histology_colo.yaml +51 -0
  59. endoreg_db/data/finding_classification/intervention_required.yaml +26 -0
  60. endoreg_db/data/finding_classification/medication_related.yaml +23 -0
  61. endoreg_db/data/finding_classification/visualized.yaml +33 -0
  62. endoreg_db/data/finding_classification_choice/bowel_preparation.yaml +78 -0
  63. endoreg_db/data/finding_classification_choice/colon_lesion_circularity_default.yaml +32 -0
  64. endoreg_db/data/finding_classification_choice/colon_lesion_jnet.yaml +15 -0
  65. endoreg_db/data/finding_classification_choice/colon_lesion_kudo.yaml +23 -0
  66. endoreg_db/data/finding_classification_choice/colon_lesion_lst.yaml +15 -0
  67. endoreg_db/data/finding_classification_choice/colon_lesion_nice.yaml +17 -0
  68. endoreg_db/data/finding_classification_choice/colon_lesion_paris.yaml +57 -0
  69. endoreg_db/data/finding_classification_choice/colon_lesion_planarity_default.yaml +49 -0
  70. endoreg_db/data/finding_classification_choice/colon_lesion_sano.yaml +14 -0
  71. endoreg_db/data/finding_classification_choice/colon_lesion_surface_intact_default.yaml +36 -0
  72. endoreg_db/data/finding_classification_choice/colonoscopy_location.yaml +229 -0
  73. endoreg_db/data/finding_classification_choice/colonoscopy_not_complete_reason.yaml +19 -0
  74. endoreg_db/data/finding_classification_choice/colonoscopy_size.yaml +82 -0
  75. endoreg_db/data/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +15 -0
  76. endoreg_db/data/finding_classification_choice/complication_generic_types.yaml +15 -0
  77. endoreg_db/data/finding_classification_choice/examination_setting_generic_types.yaml +15 -0
  78. endoreg_db/data/finding_classification_choice/histology.yaml +24 -0
  79. endoreg_db/data/finding_classification_choice/histology_polyp.yaml +20 -0
  80. endoreg_db/data/finding_classification_choice/outcome.yaml +19 -0
  81. endoreg_db/data/finding_classification_choice/yes_no_na.yaml +11 -0
  82. endoreg_db/data/finding_classification_type/colonoscopy_basic.yaml +48 -0
  83. endoreg_db/data/finding_intervention/endoscopy.yaml +43 -0
  84. endoreg_db/data/finding_intervention/endoscopy_colonoscopy.yaml +168 -0
  85. endoreg_db/data/finding_intervention/endoscopy_egd.yaml +128 -0
  86. endoreg_db/data/finding_intervention/endoscopy_ercp.yaml +32 -0
  87. endoreg_db/data/finding_intervention/endoscopy_eus_lower.yaml +9 -0
  88. endoreg_db/data/finding_intervention/endoscopy_eus_upper.yaml +36 -0
  89. endoreg_db/data/finding_intervention_type/endoscopy.yaml +15 -0
  90. endoreg_db/data/finding_morphology_classification_type/colonoscopy.yaml +79 -0
  91. endoreg_db/data/finding_type/data.yaml +43 -0
  92. endoreg_db/data/gender/data.yaml +24 -0
  93. endoreg_db/data/information_source/annotation.yaml +6 -0
  94. endoreg_db/data/information_source/endoscopy_guidelines.yaml +7 -0
  95. endoreg_db/data/information_source/prediction.yaml +7 -0
  96. endoreg_db/data/information_source_type/data.yaml +8 -0
  97. endoreg_db/data/lab_value/cardiac_enzymes.yaml +7 -1
  98. endoreg_db/data/lab_value/coagulation.yaml +6 -1
  99. endoreg_db/data/lab_value/electrolytes.yaml +39 -1
  100. endoreg_db/data/lab_value/gastrointestinal_function.yaml +12 -0
  101. endoreg_db/data/lab_value/hematology.yaml +17 -2
  102. endoreg_db/data/lab_value/hormones.yaml +6 -0
  103. endoreg_db/data/lab_value/lipids.yaml +12 -3
  104. endoreg_db/data/lab_value/misc.yaml +48 -2
  105. endoreg_db/data/lab_value/renal_function.yaml +2 -1
  106. endoreg_db/data/lx_client_tag/base.yaml +54 -0
  107. endoreg_db/data/lx_client_type/base.yaml +30 -0
  108. endoreg_db/data/lx_permission/base.yaml +24 -0
  109. endoreg_db/data/lx_permission/endoreg.yaml +52 -0
  110. endoreg_db/data/medication/anticoagulation.yaml +5 -5
  111. endoreg_db/data/medication/tah.yaml +5 -5
  112. endoreg_db/data/medication_indication/anticoagulation.yaml +48 -53
  113. endoreg_db/data/medication_intake_time/base.yaml +4 -4
  114. endoreg_db/data/names_first/first_names.yaml +54 -0
  115. endoreg_db/data/names_last/last_names.yaml +51 -0
  116. endoreg_db/data/network_device/data.yaml +30 -0
  117. endoreg_db/data/organ/data.yaml +29 -0
  118. endoreg_db/data/pdf_type/data.yaml +27 -9
  119. endoreg_db/data/qualification/endoscopy.yaml +36 -0
  120. endoreg_db/data/qualification/m2.yaml +39 -0
  121. endoreg_db/data/qualification/outpatient_clinic.yaml +35 -0
  122. endoreg_db/data/qualification/sonography.yaml +36 -0
  123. endoreg_db/data/qualification_type/base.yaml +29 -0
  124. endoreg_db/data/report_reader_flag/rkh-histology-generic.yaml +10 -0
  125. endoreg_db/data/report_reader_flag/ukw-examination-generic.yaml +4 -0
  126. endoreg_db/data/report_reader_flag/ukw-histology-generic.yaml +5 -0
  127. endoreg_db/data/requirement/age.yaml +26 -0
  128. endoreg_db/data/requirement/colonoscopy_baseline_austria.yaml +45 -0
  129. endoreg_db/data/requirement/disease_cardiovascular.yaml +79 -0
  130. endoreg_db/data/requirement/disease_classification_choice_cardiovascular.yaml +41 -0
  131. endoreg_db/data/requirement/disease_hepatology.yaml +12 -0
  132. endoreg_db/data/requirement/disease_misc.yaml +12 -0
  133. endoreg_db/data/requirement/disease_renal.yaml +96 -0
  134. endoreg_db/data/requirement/endoscopy_bleeding_risk.yaml +59 -0
  135. endoreg_db/data/requirement/event_cardiology.yaml +251 -0
  136. endoreg_db/data/requirement/event_requirements.yaml +145 -0
  137. endoreg_db/data/requirement/finding_colon_polyp.yaml +50 -0
  138. endoreg_db/data/requirement/gender.yaml +25 -0
  139. endoreg_db/data/requirement/lab_value.yaml +441 -0
  140. endoreg_db/data/requirement/medication.yaml +93 -0
  141. endoreg_db/data/requirement_operator/age.yaml +13 -0
  142. endoreg_db/data/requirement_operator/lab_operators.yaml +129 -0
  143. endoreg_db/data/requirement_operator/model_operators.yaml +96 -0
  144. endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +48 -0
  145. endoreg_db/data/requirement_set/colonoscopy_austria_screening.yaml +57 -0
  146. endoreg_db/data/requirement_set/endoscopy_bleeding_risk.yaml +52 -0
  147. endoreg_db/data/requirement_set_type/data.yaml +20 -0
  148. endoreg_db/data/requirement_type/requirement_types.yaml +165 -0
  149. endoreg_db/data/risk/bleeding.yaml +26 -0
  150. endoreg_db/data/risk/thrombosis.yaml +37 -0
  151. endoreg_db/data/risk_type/data.yaml +27 -0
  152. endoreg_db/data/setup_config.yaml +38 -0
  153. endoreg_db/data/shift/endoscopy.yaml +21 -0
  154. endoreg_db/data/shift_type/base.yaml +35 -0
  155. endoreg_db/data/tag/requirement_set_tags.yaml +11 -0
  156. endoreg_db/data/unit/concentration.yaml +23 -0
  157. endoreg_db/data/unit/time.yaml +36 -1
  158. endoreg_db/exceptions.py +19 -0
  159. endoreg_db/forms/__init__.py +3 -1
  160. endoreg_db/forms/examination_form.py +11 -0
  161. endoreg_db/forms/patient_finding_intervention_form.py +18 -0
  162. endoreg_db/forms/patient_form.py +27 -0
  163. endoreg_db/forms/questionnaires/__init__.py +1 -1
  164. endoreg_db/forms/questionnaires/tto_questionnaire.py +19 -19
  165. endoreg_db/helpers/count_db.py +45 -0
  166. endoreg_db/helpers/data_loader.py +208 -0
  167. endoreg_db/helpers/default_objects.py +378 -0
  168. endoreg_db/helpers/download_segmentation_model.py +31 -0
  169. endoreg_db/helpers/interact.py +6 -0
  170. endoreg_db/helpers/test_video_helper.py +119 -0
  171. endoreg_db/logger_conf.py +140 -0
  172. endoreg_db/management/__init__.py +1 -0
  173. endoreg_db/management/commands/__init__.py +1 -0
  174. endoreg_db/management/commands/anonymize_video.py +0 -0
  175. endoreg_db/management/commands/check_auth.py +125 -0
  176. endoreg_db/management/commands/create_model_meta_from_huggingface.py +115 -0
  177. endoreg_db/management/commands/create_multilabel_model_meta.py +214 -0
  178. endoreg_db/management/commands/fix_missing_patient_data.py +172 -0
  179. endoreg_db/management/commands/fix_video_paths.py +165 -0
  180. endoreg_db/management/commands/import_fallback_video.py +203 -0
  181. endoreg_db/management/commands/import_report.py +298 -0
  182. endoreg_db/management/commands/import_video.py +423 -0
  183. endoreg_db/management/commands/import_video_with_classification.py +367 -0
  184. endoreg_db/management/commands/init_default_ai_model.py +112 -0
  185. endoreg_db/management/commands/load_ai_model_data.py +58 -26
  186. endoreg_db/management/commands/load_ai_model_label_data.py +59 -0
  187. endoreg_db/management/commands/load_base_db_data.py +174 -118
  188. endoreg_db/management/commands/load_center_data.py +46 -21
  189. endoreg_db/management/commands/{load_logging_data.py → load_contraindication_data.py} +4 -2
  190. endoreg_db/management/commands/load_disease_data.py +29 -7
  191. endoreg_db/management/commands/{load_endoscope_type_data.py → load_endoscope_data.py} +30 -7
  192. endoreg_db/management/commands/load_examination_indication_data.py +86 -0
  193. endoreg_db/management/commands/load_finding_data.py +128 -0
  194. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +0 -1
  195. endoreg_db/management/commands/load_information_source.py +13 -7
  196. endoreg_db/management/commands/load_lab_value_data.py +3 -3
  197. endoreg_db/management/commands/load_medication_data.py +83 -21
  198. endoreg_db/management/commands/load_name_data.py +37 -0
  199. endoreg_db/management/commands/{load_medication_intake_time_data.py → load_organ_data.py} +7 -5
  200. endoreg_db/management/commands/load_qualification_data.py +59 -0
  201. endoreg_db/management/commands/load_requirement_data.py +180 -0
  202. endoreg_db/management/commands/load_risk_data.py +56 -0
  203. endoreg_db/management/commands/load_shift_data.py +60 -0
  204. endoreg_db/management/commands/load_tag_data.py +57 -0
  205. endoreg_db/management/commands/register_ai_model.py +1 -1
  206. endoreg_db/management/commands/setup_endoreg_db.py +381 -0
  207. endoreg_db/management/commands/start_filewatcher.py +106 -0
  208. endoreg_db/management/commands/storage_management.py +548 -0
  209. endoreg_db/management/commands/summarize_db_content.py +189 -0
  210. endoreg_db/management/commands/validate_video.py +204 -0
  211. endoreg_db/management/commands/validate_video_files.py +161 -0
  212. endoreg_db/management/commands/video_validation.py +22 -0
  213. endoreg_db/mermaid/Overall_flow_patient_finding_intervention.md +10 -0
  214. endoreg_db/mermaid/anonymized_image_annotation.md +20 -0
  215. endoreg_db/mermaid/binary_classification_annotation.md +50 -0
  216. endoreg_db/mermaid/classification.md +8 -0
  217. endoreg_db/mermaid/examination.md +8 -0
  218. endoreg_db/mermaid/findings.md +7 -0
  219. endoreg_db/mermaid/image_classification.md +28 -0
  220. endoreg_db/mermaid/interventions.md +8 -0
  221. endoreg_db/mermaid/morphology.md +8 -0
  222. endoreg_db/mermaid/patient_creation.md +14 -0
  223. endoreg_db/mermaid/video_segmentation_annotation.md +17 -0
  224. endoreg_db/migrations/0001_initial.py +1234 -944
  225. endoreg_db/migrations/0002_add_video_correction_models.py +52 -0
  226. endoreg_db/migrations/0003_add_center_display_name.py +30 -0
  227. endoreg_db/models/__init__.py +339 -53
  228. endoreg_db/models/administration/__init__.py +116 -0
  229. endoreg_db/models/administration/ai/__init__.py +9 -0
  230. endoreg_db/models/administration/ai/active_model.py +35 -0
  231. endoreg_db/models/administration/ai/ai_model.py +156 -0
  232. endoreg_db/models/{ai_model → administration/ai}/model_type.py +19 -4
  233. endoreg_db/models/administration/case/__init__.py +19 -0
  234. endoreg_db/models/administration/case/case.py +114 -0
  235. endoreg_db/models/{case_template → administration/case/case_template}/__init__.py +10 -1
  236. endoreg_db/models/{case_template → administration/case/case_template}/case_template.py +60 -16
  237. endoreg_db/models/{case_template → administration/case/case_template}/case_template_rule.py +6 -13
  238. endoreg_db/models/{case_template → administration/case/case_template}/case_template_rule_value.py +21 -8
  239. endoreg_db/models/{case_template → administration/case/case_template}/case_template_type.py +1 -3
  240. endoreg_db/models/{center → administration/center}/__init__.py +9 -0
  241. endoreg_db/models/administration/center/center.py +67 -0
  242. endoreg_db/models/administration/center/center_product.py +64 -0
  243. endoreg_db/models/administration/center/center_resource.py +49 -0
  244. endoreg_db/models/administration/center/center_shift.py +88 -0
  245. endoreg_db/models/administration/center/center_waste.py +30 -0
  246. endoreg_db/models/administration/permissions/__init__.py +44 -0
  247. endoreg_db/models/administration/person/__init__.py +24 -0
  248. endoreg_db/models/administration/person/employee/__init__.py +3 -0
  249. endoreg_db/models/administration/person/employee/employee.py +35 -0
  250. endoreg_db/models/administration/person/employee/employee_qualification.py +39 -0
  251. endoreg_db/models/administration/person/employee/employee_type.py +42 -0
  252. endoreg_db/models/administration/person/examiner/__init__.py +4 -0
  253. endoreg_db/models/administration/person/examiner/examiner.py +54 -0
  254. endoreg_db/models/administration/person/names/__init__.py +0 -0
  255. endoreg_db/models/{persons → administration/person/names}/first_name.py +1 -1
  256. endoreg_db/models/{persons → administration/person/names}/last_name.py +2 -3
  257. endoreg_db/models/administration/person/patient/__init__.py +5 -0
  258. endoreg_db/models/administration/person/patient/patient.py +460 -0
  259. endoreg_db/models/administration/person/profession/__init__.py +24 -0
  260. endoreg_db/models/administration/person/user/__init__.py +5 -0
  261. endoreg_db/models/administration/person/user/portal_user_information.py +37 -0
  262. endoreg_db/models/administration/product/__init__.py +14 -0
  263. endoreg_db/models/administration/product/product.py +97 -0
  264. endoreg_db/models/administration/product/product_group.py +39 -0
  265. endoreg_db/models/administration/product/product_material.py +54 -0
  266. endoreg_db/models/{product → administration/product}/product_weight.py +21 -0
  267. endoreg_db/models/{product → administration/product}/reference_product.py +44 -13
  268. endoreg_db/models/administration/qualification/__init__.py +7 -0
  269. endoreg_db/models/administration/qualification/qualification.py +37 -0
  270. endoreg_db/models/administration/qualification/qualification_type.py +35 -0
  271. endoreg_db/models/administration/shift/__init__.py +9 -0
  272. endoreg_db/models/administration/shift/scheduled_days.py +69 -0
  273. endoreg_db/models/administration/shift/shift.py +51 -0
  274. endoreg_db/models/administration/shift/shift_type.py +108 -0
  275. endoreg_db/models/label/__init__.py +24 -1
  276. endoreg_db/models/label/annotation/__init__.py +12 -0
  277. endoreg_db/models/label/annotation/image_classification.py +84 -0
  278. endoreg_db/models/label/annotation/video_segmentation_annotation.py +66 -0
  279. endoreg_db/models/label/label.py +53 -54
  280. endoreg_db/models/label/label_set.py +53 -0
  281. endoreg_db/models/label/label_type.py +29 -0
  282. endoreg_db/models/label/label_video_segment/__init__.py +3 -0
  283. endoreg_db/models/label/label_video_segment/_create_from_video.py +41 -0
  284. endoreg_db/models/label/label_video_segment/label_video_segment.py +511 -0
  285. endoreg_db/models/label/video_segmentation_label.py +31 -0
  286. endoreg_db/models/label/video_segmentation_labelset.py +27 -0
  287. endoreg_db/models/media/__init__.py +16 -0
  288. endoreg_db/models/media/frame/__init__.py +3 -0
  289. endoreg_db/models/media/frame/frame.py +111 -0
  290. endoreg_db/models/media/pdf/__init__.py +11 -0
  291. endoreg_db/models/media/pdf/raw_pdf.py +757 -0
  292. endoreg_db/models/media/pdf/report_file.py +162 -0
  293. endoreg_db/models/media/pdf/report_reader/__init__.py +7 -0
  294. endoreg_db/models/media/pdf/report_reader/report_reader_config.py +77 -0
  295. endoreg_db/models/media/video/__init__.py +8 -0
  296. endoreg_db/models/media/video/create_from_file.py +358 -0
  297. endoreg_db/models/media/video/pipe_1.py +213 -0
  298. endoreg_db/models/media/video/pipe_2.py +105 -0
  299. endoreg_db/models/media/video/refactor_plan.md +0 -0
  300. endoreg_db/models/media/video/video_file.py +825 -0
  301. endoreg_db/models/media/video/video_file_ai.py +443 -0
  302. endoreg_db/models/media/video/video_file_anonymize.py +349 -0
  303. endoreg_db/models/media/video/video_file_frames/__init__.py +47 -0
  304. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +22 -0
  305. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +23 -0
  306. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +104 -0
  307. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +174 -0
  308. endoreg_db/models/media/video/video_file_frames/_get_frame.py +28 -0
  309. endoreg_db/models/media/video/video_file_frames/_get_frame_number.py +27 -0
  310. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +20 -0
  311. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +27 -0
  312. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +34 -0
  313. endoreg_db/models/media/video/video_file_frames/_get_frames.py +27 -0
  314. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +129 -0
  315. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +141 -0
  316. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +65 -0
  317. endoreg_db/models/media/video/video_file_frames.py +0 -0
  318. endoreg_db/models/media/video/video_file_io.py +168 -0
  319. endoreg_db/models/media/video/video_file_meta/__init__.py +22 -0
  320. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +45 -0
  321. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +39 -0
  322. endoreg_db/models/media/video/video_file_meta/get_fps.py +147 -0
  323. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +143 -0
  324. endoreg_db/models/media/video/video_file_meta/text_meta.py +134 -0
  325. endoreg_db/models/media/video/video_file_meta/video_meta.py +70 -0
  326. endoreg_db/models/media/video/video_file_segments.py +209 -0
  327. endoreg_db/models/media/video/video_metadata.py +65 -0
  328. endoreg_db/models/media/video/video_processing.py +152 -0
  329. endoreg_db/models/medical/__init__.py +146 -0
  330. endoreg_db/models/medical/contraindication/__init__.py +17 -0
  331. endoreg_db/models/medical/disease.py +156 -0
  332. endoreg_db/models/medical/event.py +137 -0
  333. endoreg_db/models/medical/examination/__init__.py +9 -0
  334. endoreg_db/models/medical/examination/examination.py +148 -0
  335. endoreg_db/models/medical/examination/examination_indication.py +278 -0
  336. endoreg_db/models/medical/examination/examination_time.py +49 -0
  337. endoreg_db/models/medical/examination/examination_time_type.py +41 -0
  338. endoreg_db/models/medical/examination/examination_type.py +48 -0
  339. endoreg_db/models/medical/finding/__init__.py +18 -0
  340. endoreg_db/models/medical/finding/finding.py +96 -0
  341. endoreg_db/models/medical/finding/finding_classification.py +142 -0
  342. endoreg_db/models/medical/finding/finding_intervention.py +52 -0
  343. endoreg_db/models/medical/finding/finding_type.py +35 -0
  344. endoreg_db/models/medical/hardware/__init__.py +8 -0
  345. endoreg_db/models/medical/hardware/endoscope.py +65 -0
  346. endoreg_db/models/{hardware → medical/hardware}/endoscopy_processor.py +68 -29
  347. endoreg_db/models/medical/laboratory/__init__.py +5 -0
  348. endoreg_db/models/medical/laboratory/lab_value.py +419 -0
  349. endoreg_db/models/medical/medication/__init__.py +19 -0
  350. endoreg_db/models/medical/medication/medication.py +31 -0
  351. endoreg_db/models/medical/medication/medication_indication.py +50 -0
  352. endoreg_db/models/medical/medication/medication_indication_type.py +39 -0
  353. endoreg_db/models/medical/medication/medication_intake_time.py +44 -0
  354. endoreg_db/models/medical/medication/medication_schedule.py +45 -0
  355. endoreg_db/models/medical/organ/__init__.py +35 -0
  356. endoreg_db/models/medical/patient/__init__.py +56 -0
  357. endoreg_db/models/medical/patient/medication_examples.py +38 -0
  358. endoreg_db/models/medical/patient/patient_disease.py +63 -0
  359. endoreg_db/models/medical/patient/patient_event.py +75 -0
  360. endoreg_db/models/medical/patient/patient_examination.py +249 -0
  361. endoreg_db/models/medical/patient/patient_examination_indication.py +44 -0
  362. endoreg_db/models/medical/patient/patient_finding.py +357 -0
  363. endoreg_db/models/medical/patient/patient_finding_classification.py +207 -0
  364. endoreg_db/models/medical/patient/patient_finding_intervention.py +40 -0
  365. endoreg_db/models/medical/patient/patient_lab_sample.py +148 -0
  366. endoreg_db/models/{persons → medical}/patient/patient_lab_value.py +68 -22
  367. endoreg_db/models/medical/patient/patient_medication.py +104 -0
  368. endoreg_db/models/medical/patient/patient_medication_schedule.py +136 -0
  369. endoreg_db/models/medical/risk/__init__.py +7 -0
  370. endoreg_db/models/medical/risk/risk.py +72 -0
  371. endoreg_db/models/medical/risk/risk_type.py +51 -0
  372. endoreg_db/models/metadata/__init__.py +19 -0
  373. endoreg_db/models/metadata/frame_ocr_result.py +0 -0
  374. endoreg_db/models/metadata/model_meta.py +206 -0
  375. endoreg_db/models/metadata/model_meta_logic.py +343 -0
  376. endoreg_db/models/{data_file/metadata → metadata}/pdf_meta.py +32 -13
  377. endoreg_db/models/metadata/sensitive_meta.py +288 -0
  378. endoreg_db/models/metadata/sensitive_meta_logic.py +1048 -0
  379. endoreg_db/models/metadata/video_meta.py +332 -0
  380. endoreg_db/models/metadata/video_prediction_logic.py +190 -0
  381. endoreg_db/models/metadata/video_prediction_meta.py +270 -0
  382. endoreg_db/models/other/__init__.py +36 -1
  383. endoreg_db/models/other/distribution/__init__.py +44 -0
  384. endoreg_db/models/other/distribution/base_value_distribution.py +20 -0
  385. endoreg_db/models/other/distribution/date_value_distribution.py +89 -0
  386. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +32 -0
  387. endoreg_db/models/other/distribution/numeric_value_distribution.py +125 -0
  388. endoreg_db/models/other/distribution/single_categorical_value_distribution.py +22 -0
  389. endoreg_db/models/other/emission/__init__.py +5 -0
  390. endoreg_db/models/other/emission/emission_factor.py +94 -0
  391. endoreg_db/models/{persons → other}/gender.py +8 -3
  392. endoreg_db/models/other/information_source.py +159 -0
  393. endoreg_db/models/other/material.py +14 -2
  394. endoreg_db/models/other/resource.py +6 -2
  395. endoreg_db/models/other/tag.py +27 -0
  396. endoreg_db/models/other/transport_route.py +15 -3
  397. endoreg_db/models/{unit.py → other/unit.py} +16 -6
  398. endoreg_db/models/other/waste.py +10 -3
  399. endoreg_db/models/requirement/__init__.py +11 -0
  400. endoreg_db/models/requirement/requirement.py +767 -0
  401. endoreg_db/models/requirement/requirement_evaluation/__init__.py +6 -0
  402. endoreg_db/models/requirement/requirement_evaluation/get_values.py +40 -0
  403. endoreg_db/models/requirement/requirement_evaluation/operator_evaluation_models.py +9 -0
  404. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +95 -0
  405. endoreg_db/models/requirement/requirement_operator.py +176 -0
  406. endoreg_db/models/requirement/requirement_set.py +287 -0
  407. endoreg_db/models/rule/__init__.py +13 -0
  408. endoreg_db/models/{rules → rule}/rule.py +6 -3
  409. endoreg_db/models/{rules → rule}/rule_attribute_dtype.py +0 -2
  410. endoreg_db/models/{rules → rule}/rule_type.py +0 -2
  411. endoreg_db/models/{rules → rule}/ruleset.py +0 -2
  412. endoreg_db/models/state/__init__.py +12 -0
  413. endoreg_db/models/state/abstract.py +11 -0
  414. endoreg_db/models/state/audit_ledger.py +150 -0
  415. endoreg_db/models/state/label_video_segment.py +22 -0
  416. endoreg_db/models/state/raw_pdf.py +187 -0
  417. endoreg_db/models/state/sensitive_meta.py +46 -0
  418. endoreg_db/models/state/video.py +232 -0
  419. endoreg_db/models/upload_job.py +99 -0
  420. endoreg_db/models/utils.py +135 -0
  421. endoreg_db/renames.yml +8 -0
  422. endoreg_db/root_urls.py +9 -0
  423. endoreg_db/schemas/__init__.py +0 -0
  424. endoreg_db/schemas/examination_evaluation.py +27 -0
  425. endoreg_db/serializers/Frames_NICE_and_PARIS_classifications.py +775 -0
  426. endoreg_db/serializers/__init__.py +118 -10
  427. endoreg_db/serializers/_old/raw_pdf_meta_validation.py +223 -0
  428. endoreg_db/serializers/_old/raw_video_meta_validation.py +179 -0
  429. endoreg_db/serializers/_old/video.py +71 -0
  430. endoreg_db/serializers/administration/__init__.py +14 -0
  431. endoreg_db/serializers/administration/ai/__init__.py +10 -0
  432. endoreg_db/serializers/administration/ai/active_model.py +10 -0
  433. endoreg_db/serializers/administration/ai/ai_model.py +18 -0
  434. endoreg_db/serializers/administration/ai/model_type.py +10 -0
  435. endoreg_db/serializers/administration/center.py +9 -0
  436. endoreg_db/serializers/administration/gender.py +9 -0
  437. endoreg_db/serializers/anonymization.py +69 -0
  438. endoreg_db/serializers/evaluation/examination_evaluation.py +1 -0
  439. endoreg_db/serializers/examination/__init__.py +10 -0
  440. endoreg_db/serializers/examination/base.py +46 -0
  441. endoreg_db/serializers/examination/dropdown.py +21 -0
  442. endoreg_db/serializers/examination_serializer.py +12 -0
  443. endoreg_db/serializers/finding/__init__.py +5 -0
  444. endoreg_db/serializers/finding/finding.py +54 -0
  445. endoreg_db/serializers/finding_classification/__init__.py +7 -0
  446. endoreg_db/serializers/finding_classification/choice.py +19 -0
  447. endoreg_db/serializers/finding_classification/classification.py +13 -0
  448. endoreg_db/serializers/label/__init__.py +7 -0
  449. endoreg_db/serializers/label/image_classification_annotation.py +62 -0
  450. endoreg_db/serializers/label/label.py +15 -0
  451. endoreg_db/serializers/label_video_segment/__init__.py +7 -0
  452. endoreg_db/serializers/label_video_segment/_lvs_create.py +149 -0
  453. endoreg_db/serializers/label_video_segment/_lvs_update.py +138 -0
  454. endoreg_db/serializers/label_video_segment/_lvs_validate.py +149 -0
  455. endoreg_db/serializers/label_video_segment/label_video_segment.py +344 -0
  456. endoreg_db/serializers/label_video_segment/label_video_segment_annotation.py +99 -0
  457. endoreg_db/serializers/label_video_segment/label_video_segment_update.py +163 -0
  458. endoreg_db/serializers/meta/__init__.py +19 -0
  459. endoreg_db/serializers/meta/pdf_file_meta_extraction.py +115 -0
  460. endoreg_db/serializers/meta/report_meta.py +53 -0
  461. endoreg_db/serializers/meta/sensitive_meta_detail.py +162 -0
  462. endoreg_db/serializers/meta/sensitive_meta_update.py +148 -0
  463. endoreg_db/serializers/meta/sensitive_meta_verification.py +59 -0
  464. endoreg_db/serializers/meta/video_meta.py +39 -0
  465. endoreg_db/serializers/misc/__init__.py +14 -0
  466. endoreg_db/serializers/misc/file_overview.py +182 -0
  467. endoreg_db/serializers/misc/sensitive_patient_data.py +120 -0
  468. endoreg_db/serializers/misc/stats.py +33 -0
  469. endoreg_db/serializers/misc/translatable_field_mix_in.py +44 -0
  470. endoreg_db/serializers/misc/upload_job.py +71 -0
  471. endoreg_db/serializers/patient/__init__.py +11 -0
  472. endoreg_db/serializers/patient/patient.py +86 -0
  473. endoreg_db/serializers/patient/patient_dropdown.py +27 -0
  474. endoreg_db/serializers/patient_examination/__init__.py +7 -0
  475. endoreg_db/serializers/patient_examination/patient_examination.py +141 -0
  476. endoreg_db/serializers/patient_finding/__init__.py +15 -0
  477. endoreg_db/serializers/patient_finding/patient_finding.py +31 -0
  478. endoreg_db/serializers/patient_finding/patient_finding_classification.py +39 -0
  479. endoreg_db/serializers/patient_finding/patient_finding_detail.py +53 -0
  480. endoreg_db/serializers/patient_finding/patient_finding_intervention.py +26 -0
  481. endoreg_db/serializers/patient_finding/patient_finding_list.py +41 -0
  482. endoreg_db/serializers/patient_finding/patient_finding_write.py +126 -0
  483. endoreg_db/serializers/pdf/__init__.py +5 -0
  484. endoreg_db/serializers/pdf/anony_text_validation.py +85 -0
  485. endoreg_db/serializers/report/__init__.py +9 -0
  486. endoreg_db/serializers/report/mixins.py +45 -0
  487. endoreg_db/serializers/report/report.py +105 -0
  488. endoreg_db/serializers/report/report_list.py +22 -0
  489. endoreg_db/serializers/report/secure_file_url.py +26 -0
  490. endoreg_db/serializers/requirements/requirement_schema.py +25 -0
  491. endoreg_db/serializers/requirements/requirement_sets.py +29 -0
  492. endoreg_db/serializers/sensitive_meta_serializer.py +282 -0
  493. endoreg_db/serializers/video/__init__.py +7 -0
  494. endoreg_db/serializers/video/segmentation.py +263 -0
  495. endoreg_db/serializers/video/video_file_brief.py +10 -0
  496. endoreg_db/serializers/video/video_file_detail.py +83 -0
  497. endoreg_db/serializers/video/video_file_list.py +67 -0
  498. endoreg_db/serializers/video/video_metadata.py +105 -0
  499. endoreg_db/serializers/video/video_processing_history.py +153 -0
  500. endoreg_db/serializers/video_examination.py +198 -0
  501. endoreg_db/services/__init__.py +5 -0
  502. endoreg_db/services/anonymization.py +223 -0
  503. endoreg_db/services/examination_evaluation.py +149 -0
  504. endoreg_db/services/finding_description_service.py +0 -0
  505. endoreg_db/services/lookup_service.py +411 -0
  506. endoreg_db/services/lookup_store.py +266 -0
  507. endoreg_db/services/pdf_import.py +1382 -0
  508. endoreg_db/services/polling_coordinator.py +288 -0
  509. endoreg_db/services/pseudonym_service.py +89 -0
  510. endoreg_db/services/requirements_object.py +147 -0
  511. endoreg_db/services/segment_sync.py +155 -0
  512. endoreg_db/services/storage_aware_video_processor.py +344 -0
  513. endoreg_db/services/video_import.py +1259 -0
  514. endoreg_db/tasks/upload_tasks.py +207 -0
  515. endoreg_db/tasks/video_ingest.py +157 -0
  516. endoreg_db/tasks/video_processing_tasks.py +327 -0
  517. endoreg_db/templates/admin/patient_finding_intervention.html +253 -0
  518. endoreg_db/templates/admin/start_examination.html +12 -0
  519. endoreg_db/templates/timeline.html +176 -0
  520. endoreg_db/urls/__init__.py +83 -0
  521. endoreg_db/urls/anonymization.py +32 -0
  522. endoreg_db/urls/auth.py +16 -0
  523. endoreg_db/urls/classification.py +39 -0
  524. endoreg_db/urls/examination.py +54 -0
  525. endoreg_db/urls/files.py +6 -0
  526. endoreg_db/urls/label_video_segment_validate.py +33 -0
  527. endoreg_db/urls/label_video_segments.py +46 -0
  528. endoreg_db/urls/media.py +227 -0
  529. endoreg_db/urls/patient.py +19 -0
  530. endoreg_db/urls/report.py +48 -0
  531. endoreg_db/urls/requirements.py +13 -0
  532. endoreg_db/urls/sensitive_meta.py +0 -0
  533. endoreg_db/urls/stats.py +46 -0
  534. endoreg_db/urls/upload.py +20 -0
  535. endoreg_db/urls/video.py +61 -0
  536. endoreg_db/urls.py +9 -0
  537. endoreg_db/utils/__init__.py +88 -1
  538. endoreg_db/utils/ai/__init__.py +9 -0
  539. endoreg_db/{models/ai_model/utils.py → utils/ai/get.py} +1 -4
  540. endoreg_db/utils/ai/inference_dataset.py +52 -0
  541. endoreg_db/utils/ai/multilabel_classification_net.py +159 -0
  542. endoreg_db/utils/ai/postprocess.py +63 -0
  543. endoreg_db/utils/ai/predict.py +291 -0
  544. endoreg_db/utils/ai/preprocess.py +68 -0
  545. endoreg_db/utils/calc_duration_seconds.py +24 -0
  546. endoreg_db/utils/case_generator/__init__.py +0 -0
  547. endoreg_db/utils/case_generator/case_generator.py +159 -0
  548. endoreg_db/utils/case_generator/lab_sample_factory.py +33 -0
  549. endoreg_db/utils/case_generator/utils.py +30 -0
  550. endoreg_db/utils/check_video_files.py +148 -0
  551. endoreg_db/utils/dataloader.py +118 -35
  552. endoreg_db/utils/dates.py +60 -0
  553. endoreg_db/utils/env.py +33 -0
  554. endoreg_db/utils/extract_specific_frames.py +72 -0
  555. endoreg_db/utils/file_operations.py +29 -1
  556. endoreg_db/utils/fix_video_path_direct.py +141 -0
  557. endoreg_db/utils/frame_anonymization_utils.py +463 -0
  558. endoreg_db/utils/hashs.py +123 -4
  559. endoreg_db/utils/links/__init__.py +0 -0
  560. endoreg_db/utils/links/requirement_link.py +193 -0
  561. endoreg_db/utils/mime_types.py +0 -0
  562. endoreg_db/utils/names.py +76 -0
  563. endoreg_db/utils/parse_and_generate_yaml.py +46 -0
  564. endoreg_db/utils/paths.py +95 -0
  565. endoreg_db/utils/permissions.py +143 -0
  566. endoreg_db/utils/pipelines/Readme.md +235 -0
  567. endoreg_db/utils/pipelines/__init__.py +0 -0
  568. endoreg_db/utils/pipelines/process_video_dir.py +120 -0
  569. endoreg_db/utils/product/__init__.py +0 -0
  570. endoreg_db/utils/product/sum_emissions.py +20 -0
  571. endoreg_db/utils/product/sum_weights.py +18 -0
  572. endoreg_db/utils/pydantic_models/__init__.py +6 -0
  573. endoreg_db/utils/pydantic_models/db_config.py +57 -0
  574. endoreg_db/utils/requirement_helpers.py +0 -0
  575. endoreg_db/utils/requirement_operator_logic/__init__.py +0 -0
  576. endoreg_db/utils/requirement_operator_logic/lab_value_operators.py +578 -0
  577. endoreg_db/utils/requirement_operator_logic/model_evaluators.py +368 -0
  578. endoreg_db/utils/setup_config.py +177 -0
  579. endoreg_db/utils/translation.py +27 -0
  580. endoreg_db/utils/validate_endo_roi.py +19 -0
  581. endoreg_db/utils/validate_subcategory_dict.py +91 -0
  582. endoreg_db/utils/validate_video_detailed.py +357 -0
  583. endoreg_db/utils/video/__init__.py +26 -0
  584. endoreg_db/utils/video/extract_frames.py +88 -0
  585. endoreg_db/utils/video/ffmpeg_wrapper.py +835 -0
  586. endoreg_db/utils/video/names.py +42 -0
  587. endoreg_db/utils/video/streaming_processor.py +312 -0
  588. endoreg_db/utils/video/video_splitter.py +94 -0
  589. endoreg_db/views/Frames_NICE_and_PARIS_classifications_views.py +238 -0
  590. endoreg_db/views/__init__.py +274 -0
  591. endoreg_db/views/anonymization/__init__.py +27 -0
  592. endoreg_db/views/anonymization/media_management.py +454 -0
  593. endoreg_db/views/anonymization/overview.py +216 -0
  594. endoreg_db/views/anonymization/validate.py +107 -0
  595. endoreg_db/views/auth/__init__.py +13 -0
  596. endoreg_db/views/auth/keycloak.py +113 -0
  597. endoreg_db/views/examination/__init__.py +33 -0
  598. endoreg_db/views/examination/examination.py +37 -0
  599. endoreg_db/views/examination/examination_manifest_cache.py +26 -0
  600. endoreg_db/views/examination/get_finding_classification_choices.py +59 -0
  601. endoreg_db/views/examination/get_finding_classifications.py +36 -0
  602. endoreg_db/views/examination/get_findings.py +41 -0
  603. endoreg_db/views/examination/get_instruments.py +18 -0
  604. endoreg_db/views/examination/get_interventions.py +14 -0
  605. endoreg_db/views/finding/__init__.py +9 -0
  606. endoreg_db/views/finding/finding.py +112 -0
  607. endoreg_db/views/finding/get_classifications.py +14 -0
  608. endoreg_db/views/finding/get_interventions.py +17 -0
  609. endoreg_db/views/finding_classification/__init__.py +13 -0
  610. endoreg_db/views/finding_classification/base.py +0 -0
  611. endoreg_db/views/finding_classification/finding_classification.py +42 -0
  612. endoreg_db/views/finding_classification/get_classification_choices.py +55 -0
  613. endoreg_db/views/label/__init__.py +5 -0
  614. endoreg_db/views/label/label.py +15 -0
  615. endoreg_db/views/label_video_segment/__init__.py +16 -0
  616. endoreg_db/views/label_video_segment/create_lvs_from_annotation.py +44 -0
  617. endoreg_db/views/label_video_segment/get_lvs_by_name_and_video.py +50 -0
  618. endoreg_db/views/label_video_segment/label_video_segment.py +77 -0
  619. endoreg_db/views/label_video_segment/label_video_segment_by_label.py +174 -0
  620. endoreg_db/views/label_video_segment/label_video_segment_detail.py +73 -0
  621. endoreg_db/views/label_video_segment/update_lvs_from_annotation.py +46 -0
  622. endoreg_db/views/label_video_segment/validate.py +226 -0
  623. endoreg_db/views/media/__init__.py +45 -0
  624. endoreg_db/views/media/pdf_media.py +388 -0
  625. endoreg_db/views/media/segments.py +71 -0
  626. endoreg_db/views/media/sensitive_metadata.py +314 -0
  627. endoreg_db/views/media/video_media.py +272 -0
  628. endoreg_db/views/media/video_segments.py +524 -0
  629. endoreg_db/views/meta/__init__.py +15 -0
  630. endoreg_db/views/meta/available_files_list.py +146 -0
  631. endoreg_db/views/meta/report_meta.py +53 -0
  632. endoreg_db/views/meta/sensitive_meta_detail.py +148 -0
  633. endoreg_db/views/meta/sensitive_meta_list.py +104 -0
  634. endoreg_db/views/meta/sensitive_meta_verification.py +71 -0
  635. endoreg_db/views/misc/__init__.py +63 -0
  636. endoreg_db/views/misc/center.py +13 -0
  637. endoreg_db/views/misc/csrf.py +7 -0
  638. endoreg_db/views/misc/gender.py +14 -0
  639. endoreg_db/views/misc/secure_file_serving_view.py +80 -0
  640. endoreg_db/views/misc/secure_file_url_view.py +84 -0
  641. endoreg_db/views/misc/secure_url_validate.py +79 -0
  642. endoreg_db/views/misc/stats.py +220 -0
  643. endoreg_db/views/misc/translation.py +182 -0
  644. endoreg_db/views/misc/upload_views.py +240 -0
  645. endoreg_db/views/patient/__init__.py +5 -0
  646. endoreg_db/views/patient/patient.py +210 -0
  647. endoreg_db/views/patient_examination/DEPRECATED_video_backup.py +164 -0
  648. endoreg_db/views/patient_examination/__init__.py +11 -0
  649. endoreg_db/views/patient_examination/patient_examination.py +140 -0
  650. endoreg_db/views/patient_examination/patient_examination_create.py +63 -0
  651. endoreg_db/views/patient_examination/patient_examination_detail.py +66 -0
  652. endoreg_db/views/patient_examination/patient_examination_list.py +68 -0
  653. endoreg_db/views/patient_examination/video.py +194 -0
  654. endoreg_db/views/patient_finding/__init__.py +7 -0
  655. endoreg_db/views/patient_finding/base.py +0 -0
  656. endoreg_db/views/patient_finding/patient_finding.py +64 -0
  657. endoreg_db/views/patient_finding/patient_finding_optimized.py +259 -0
  658. endoreg_db/views/patient_finding_classification/__init__.py +5 -0
  659. endoreg_db/views/patient_finding_classification/pfc_create.py +67 -0
  660. endoreg_db/views/patient_finding_location/__init__.py +5 -0
  661. endoreg_db/views/patient_finding_location/pfl_create.py +70 -0
  662. endoreg_db/views/patient_finding_morphology/__init__.py +5 -0
  663. endoreg_db/views/patient_finding_morphology/pfm_create.py +70 -0
  664. endoreg_db/views/pdf/__init__.py +8 -0
  665. endoreg_db/views/pdf/pdf_stream.py +187 -0
  666. endoreg_db/views/pdf/reimport.py +177 -0
  667. endoreg_db/views/report/__init__.py +9 -0
  668. endoreg_db/views/report/report_list.py +112 -0
  669. endoreg_db/views/report/report_with_secure_url.py +28 -0
  670. endoreg_db/views/report/start_examination.py +7 -0
  671. endoreg_db/views/requirement/__init__.py +10 -0
  672. endoreg_db/views/requirement/evaluate.py +279 -0
  673. endoreg_db/views/requirement/lookup.py +367 -0
  674. endoreg_db/views/requirement/lookup_store.py +252 -0
  675. endoreg_db/views/requirement_lookup/lookup.py +0 -0
  676. endoreg_db/views/requirement_lookup/lookup_store.py +0 -0
  677. endoreg_db/views/stats/__init__.py +13 -0
  678. endoreg_db/views/stats/stats_views.py +229 -0
  679. endoreg_db/views/video/__init__.py +59 -0
  680. endoreg_db/views/video/correction.py +530 -0
  681. endoreg_db/views/video/reimport.py +195 -0
  682. endoreg_db/views/video/segmentation.py +274 -0
  683. endoreg_db/views/video/task_status.py +49 -0
  684. endoreg_db/views/video/timeline.py +46 -0
  685. endoreg_db/views/video/video_analyze.py +52 -0
  686. endoreg_db/views/video/video_apply_mask.py +48 -0
  687. endoreg_db/views/video/video_correction.py +21 -0
  688. endoreg_db/views/video/video_download_processed.py +58 -0
  689. endoreg_db/views/video/video_examination_viewset.py +242 -0
  690. endoreg_db/views/video/video_meta.py +29 -0
  691. endoreg_db/views/video/video_processing_history.py +24 -0
  692. endoreg_db/views/video/video_remove_frames.py +48 -0
  693. endoreg_db/views/video/video_stream.py +306 -0
  694. endoreg_db/views.py +0 -3
  695. endoreg_db-0.8.6.3.dist-info/METADATA +383 -0
  696. endoreg_db-0.8.6.3.dist-info/RECORD +793 -0
  697. {endoreg_db-0.4.5.dist-info → endoreg_db-0.8.6.3.dist-info}/WHEEL +1 -1
  698. endoreg_db/data/active_model/data.yaml +0 -3
  699. endoreg_db/data/agl_service/data.yaml +0 -19
  700. endoreg_db/data/label/label-set/data.yaml +0 -18
  701. endoreg_db/management/commands/_load_model_template.py +0 -41
  702. endoreg_db/management/commands/delete_all.py +0 -18
  703. endoreg_db/management/commands/delete_legacy_images.py +0 -19
  704. endoreg_db/management/commands/delete_legacy_videos.py +0 -17
  705. endoreg_db/management/commands/extract_legacy_video_frames.py +0 -18
  706. endoreg_db/management/commands/fetch_legacy_image_dataset.py +0 -32
  707. endoreg_db/management/commands/fix_auth_permission.py +0 -20
  708. endoreg_db/management/commands/import_legacy_images.py +0 -94
  709. endoreg_db/management/commands/import_legacy_videos.py +0 -76
  710. endoreg_db/management/commands/load_active_model_data.py +0 -45
  711. endoreg_db/management/commands/load_endoscopy_processor_data.py +0 -45
  712. endoreg_db/management/commands/load_g_play_data.py +0 -113
  713. endoreg_db/management/commands/load_label_data.py +0 -67
  714. endoreg_db/management/commands/load_medication_indication_data.py +0 -63
  715. endoreg_db/management/commands/load_medication_indication_type_data.py +0 -41
  716. endoreg_db/management/commands/load_medication_schedule_data.py +0 -55
  717. endoreg_db/management/commands/load_network_data.py +0 -57
  718. endoreg_db/migrations/0002_anonymizedimagelabel_anonymousimageannotation_and_more.py +0 -55
  719. endoreg_db/migrations/0003_anonymousimageannotation_original_image_url_and_more.py +0 -39
  720. endoreg_db/migrations/0004_alter_rawpdffile_file.py +0 -20
  721. endoreg_db/migrations/0005_uploadedfile_alter_rawpdffile_file_anonymizedfile.py +0 -40
  722. endoreg_db/migrations/0006_alter_rawpdffile_file.py +0 -20
  723. endoreg_db/migrations/0007_networkdevicelogentry_datetime_and_more.py +0 -43
  724. endoreg_db/models/ai_model/__init__.py +0 -3
  725. endoreg_db/models/ai_model/active_model.py +0 -9
  726. endoreg_db/models/ai_model/model_meta.py +0 -24
  727. endoreg_db/models/annotation/__init__.py +0 -3
  728. endoreg_db/models/annotation/anonymized_image_annotation.py +0 -60
  729. endoreg_db/models/annotation/binary_classification_annotation_task.py +0 -80
  730. endoreg_db/models/annotation/image_classification.py +0 -27
  731. endoreg_db/models/center/center.py +0 -25
  732. endoreg_db/models/center/center_product.py +0 -34
  733. endoreg_db/models/center/center_resource.py +0 -19
  734. endoreg_db/models/center/center_waste.py +0 -11
  735. endoreg_db/models/data_file/__init__.py +0 -6
  736. endoreg_db/models/data_file/base_classes/__init__.py +0 -2
  737. endoreg_db/models/data_file/base_classes/abstract_frame.py +0 -51
  738. endoreg_db/models/data_file/base_classes/abstract_video.py +0 -201
  739. endoreg_db/models/data_file/frame.py +0 -45
  740. endoreg_db/models/data_file/import_classes/__init__.py +0 -32
  741. endoreg_db/models/data_file/import_classes/processing_functions/__init__.py +0 -35
  742. endoreg_db/models/data_file/import_classes/processing_functions/pdf.py +0 -28
  743. endoreg_db/models/data_file/import_classes/processing_functions/video.py +0 -260
  744. endoreg_db/models/data_file/import_classes/raw_pdf.py +0 -188
  745. endoreg_db/models/data_file/import_classes/raw_video.py +0 -343
  746. endoreg_db/models/data_file/metadata/__init__.py +0 -3
  747. endoreg_db/models/data_file/metadata/sensitive_meta.py +0 -31
  748. endoreg_db/models/data_file/metadata/video_meta.py +0 -133
  749. endoreg_db/models/data_file/report_file.py +0 -89
  750. endoreg_db/models/data_file/video/__init__.py +0 -7
  751. endoreg_db/models/data_file/video/import_meta.py +0 -25
  752. endoreg_db/models/data_file/video/video.py +0 -25
  753. endoreg_db/models/data_file/video_segment.py +0 -107
  754. endoreg_db/models/disease.py +0 -56
  755. endoreg_db/models/emission/__init__.py +0 -1
  756. endoreg_db/models/emission/emission_factor.py +0 -20
  757. endoreg_db/models/event.py +0 -22
  758. endoreg_db/models/examination/__init__.py +0 -4
  759. endoreg_db/models/examination/examination.py +0 -26
  760. endoreg_db/models/examination/examination_time.py +0 -27
  761. endoreg_db/models/examination/examination_time_type.py +0 -24
  762. endoreg_db/models/examination/examination_type.py +0 -18
  763. endoreg_db/models/hardware/__init__.py +0 -2
  764. endoreg_db/models/hardware/endoscope.py +0 -44
  765. endoreg_db/models/information_source.py +0 -29
  766. endoreg_db/models/laboratory/__init__.py +0 -1
  767. endoreg_db/models/laboratory/lab_value.py +0 -102
  768. endoreg_db/models/legacy_data/__init__.py +0 -3
  769. endoreg_db/models/legacy_data/image.py +0 -34
  770. endoreg_db/models/logging/__init__.py +0 -4
  771. endoreg_db/models/logging/agl_service.py +0 -19
  772. endoreg_db/models/logging/base.py +0 -22
  773. endoreg_db/models/logging/log_type.py +0 -23
  774. endoreg_db/models/logging/network_device.py +0 -24
  775. endoreg_db/models/medication/__init__.py +0 -1
  776. endoreg_db/models/medication/medication.py +0 -148
  777. endoreg_db/models/network/__init__.py +0 -3
  778. endoreg_db/models/network/agl_service.py +0 -38
  779. endoreg_db/models/network/network_device.py +0 -53
  780. endoreg_db/models/network/network_device_type.py +0 -23
  781. endoreg_db/models/other/distribution.py +0 -215
  782. endoreg_db/models/patient_examination/__init__.py +0 -35
  783. endoreg_db/models/permissions/__init__.py +0 -44
  784. endoreg_db/models/persons/__init__.py +0 -7
  785. endoreg_db/models/persons/examiner/__init__.py +0 -2
  786. endoreg_db/models/persons/examiner/examiner.py +0 -16
  787. endoreg_db/models/persons/examiner/examiner_type.py +0 -2
  788. endoreg_db/models/persons/patient/__init__.py +0 -8
  789. endoreg_db/models/persons/patient/case/case.py +0 -30
  790. endoreg_db/models/persons/patient/patient.py +0 -216
  791. endoreg_db/models/persons/patient/patient_disease.py +0 -16
  792. endoreg_db/models/persons/patient/patient_event.py +0 -22
  793. endoreg_db/models/persons/patient/patient_lab_sample.py +0 -106
  794. endoreg_db/models/persons/patient/patient_medication.py +0 -44
  795. endoreg_db/models/persons/patient/patient_medication_schedule.py +0 -28
  796. endoreg_db/models/persons/portal_user_information.py +0 -27
  797. endoreg_db/models/prediction/__init__.py +0 -2
  798. endoreg_db/models/prediction/image_classification.py +0 -37
  799. endoreg_db/models/prediction/video_prediction_meta.py +0 -244
  800. endoreg_db/models/product/__init__.py +0 -5
  801. endoreg_db/models/product/product.py +0 -97
  802. endoreg_db/models/product/product_group.py +0 -19
  803. endoreg_db/models/product/product_material.py +0 -24
  804. endoreg_db/models/questionnaires/__init__.py +0 -114
  805. endoreg_db/models/quiz/__init__.py +0 -2
  806. endoreg_db/models/quiz/quiz_answer.py +0 -41
  807. endoreg_db/models/quiz/quiz_question.py +0 -54
  808. endoreg_db/models/report_reader/__init__.py +0 -2
  809. endoreg_db/models/report_reader/report_reader_config.py +0 -53
  810. endoreg_db/models/rules/__init__.py +0 -5
  811. endoreg_db/queries/get/__init__.py +0 -6
  812. endoreg_db/queries/get/center.py +0 -42
  813. endoreg_db/queries/get/model.py +0 -13
  814. endoreg_db/queries/get/patient.py +0 -14
  815. endoreg_db/queries/get/patient_examination.py +0 -20
  816. endoreg_db/queries/get/report_file.py +0 -33
  817. endoreg_db/queries/get/video.py +0 -31
  818. endoreg_db/serializers/ai_model.py +0 -19
  819. endoreg_db/serializers/annotation.py +0 -17
  820. endoreg_db/serializers/center.py +0 -11
  821. endoreg_db/serializers/examination.py +0 -33
  822. endoreg_db/serializers/frame.py +0 -13
  823. endoreg_db/serializers/hardware.py +0 -21
  824. endoreg_db/serializers/label.py +0 -22
  825. endoreg_db/serializers/patient.py +0 -10
  826. endoreg_db/serializers/prediction.py +0 -15
  827. endoreg_db/serializers/report_file.py +0 -7
  828. endoreg_db/serializers/video.py +0 -27
  829. endoreg_db/tests.py +0 -3
  830. endoreg_db/utils/legacy_ocr.py +0 -201
  831. endoreg_db/utils/video_metadata.py +0 -87
  832. endoreg_db-0.4.5.dist-info/METADATA +0 -34
  833. endoreg_db-0.4.5.dist-info/RECORD +0 -316
  834. /endoreg_db/{data/distribution/numeric/.init → api/serializers/finding_descriptions.py} +0 -0
  835. /endoreg_db/{models/persons/patient/case/__init__.py → api/views/finding_descriptions.py} +0 -0
  836. /endoreg_db/{queries/get/annotation.py → config/__init__.py} +0 -0
  837. /endoreg_db/data/{label → ai_model_label}/label-type/data.yaml +0 -0
  838. /endoreg_db/data/{model_type → ai_model_type}/data.yaml +0 -0
  839. /endoreg_db/{queries/get/prediction.py → data/shift/m2.yaml} +0 -0
  840. /endoreg_db/{queries/get/video_import_meta.py → factories/__init__.py} +0 -0
  841. /endoreg_db/{queries/get/video_prediction_meta.py → helpers/__init__.py} +0 -0
  842. /endoreg_db/management/commands/{load_report_reader_flag.py → load_report_reader_flag_data.py} +0 -0
  843. /endoreg_db/models/{persons → administration/person}/person.py +0 -0
  844. /endoreg_db/models/{report_reader → media/pdf/report_reader}/report_reader_flag.py +0 -0
  845. /endoreg_db/models/{rules → rule}/rule_applicator.py +0 -0
  846. {endoreg_db-0.4.5.dist-info → endoreg_db-0.8.6.3.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,1382 @@
1
+ """
2
+ PDF import service module.
3
+
4
+ Provides high-level functions for importing and anonymizing PDF files,
5
+ combining RawPdfFile creation with text extraction and anonymization.
6
+ """
7
+
8
+ import errno
9
+ import hashlib
10
+ import logging
11
+ import os
12
+ import shutil
13
+ import sys
14
+ import time
15
+ from contextlib import contextmanager
16
+ from datetime import date, datetime
17
+ from pathlib import Path
18
+ from typing import TYPE_CHECKING, Union
19
+
20
+ from django.db import transaction
21
+
22
+ from endoreg_db.models import SensitiveMeta
23
+ from endoreg_db.models.media.pdf.raw_pdf import RawPdfFile
24
+ from endoreg_db.models.state.raw_pdf import RawPdfState
25
+ from endoreg_db.utils import paths as path_utils
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Treat lock files older than this as stale and reclaim them (in seconds)
30
+ STALE_LOCK_SECONDS = 600
31
+
32
+ if TYPE_CHECKING:
33
+ pass # RawPdfFile already imported above
34
+
35
+
36
+ class PdfImportService:
37
+ """
38
+ Service class for importing and processing PDF files with text extraction and anonymization.
39
+ Uses a central PDF instance pattern for cleaner state management.
40
+
41
+ Supports two processing modes:
42
+ - 'blackening': Simple PDF masking with black rectangles over sensitive areas
43
+ - 'cropping': Advanced mode that crops sensitive regions to separate images
44
+ """
45
+
46
+ def __init__(
47
+ self, allow_meta_overwrite: bool = False, processing_mode: str = "blackening"
48
+ ):
49
+ """
50
+ Initialize the PDF import service.
51
+
52
+ Args:
53
+ allow_meta_overwrite: Whether to allow overwriting existing SensitiveMeta fields
54
+ processing_mode: Processing mode - 'blackening' for simple masking, 'cropping' for advanced cropping
55
+ """
56
+ self.processed_files = set()
57
+ self._report_reader_available = None
58
+ self._report_reader_class = None
59
+ self.allow_meta_overwrite = allow_meta_overwrite
60
+
61
+ # Validate and set processing mode
62
+ valid_modes = ["blackening", "cropping"]
63
+ if processing_mode not in valid_modes:
64
+ raise ValueError(
65
+ f"Invalid processing_mode '{processing_mode}'. Must be one of: {valid_modes}"
66
+ )
67
+ self.processing_mode = processing_mode
68
+
69
+ # Central PDF instance management
70
+ self.current_pdf = None
71
+ self.processing_context = {}
72
+
73
+ @classmethod
74
+ def with_blackening(cls, allow_meta_overwrite: bool = False) -> "PdfImportService":
75
+ """
76
+ Create a PdfImportService configured for simple PDF blackening mode.
77
+
78
+ Args:
79
+ allow_meta_overwrite: Whether to allow overwriting existing SensitiveMeta fields
80
+
81
+ Returns:
82
+ PdfImportService instance configured for blackening mode
83
+ """
84
+ return cls(
85
+ allow_meta_overwrite=allow_meta_overwrite, processing_mode="blackening"
86
+ )
87
+
88
+ @classmethod
89
+ def with_cropping(cls, allow_meta_overwrite: bool = False) -> "PdfImportService":
90
+ """
91
+ Create a PdfImportService configured for advanced cropping mode.
92
+
93
+ Args:
94
+ allow_meta_overwrite: Whether to allow overwriting existing SensitiveMeta fields
95
+
96
+ Returns:
97
+ PdfImportService instance configured for cropping mode
98
+ """
99
+ return cls(
100
+ allow_meta_overwrite=allow_meta_overwrite, processing_mode="cropping"
101
+ )
102
+
103
+ @contextmanager
104
+ def _file_lock(self, path: Path):
105
+ """Create a file lock to prevent duplicate processing.
106
+ Handles stale lock files by reclaiming after STALE_LOCK_SECONDS.
107
+ """
108
+ lock_path = Path(str(path) + ".lock")
109
+ fd = None
110
+ try:
111
+ try:
112
+ # atomic create; fail if exists
113
+ fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
114
+ except FileExistsError:
115
+ # Check for stale lock
116
+ age = None
117
+ try:
118
+ st = os.stat(lock_path)
119
+ age = time.time() - st.st_mtime
120
+ except FileNotFoundError:
121
+ # race: lock removed between exists and stat; just retry acquiring below
122
+ pass
123
+
124
+ if age is not None and age > STALE_LOCK_SECONDS:
125
+ try:
126
+ logger.warning(
127
+ "Stale lock detected for %s (age %.0fs). Reclaiming lock...",
128
+ path,
129
+ age,
130
+ )
131
+ lock_path.unlink()
132
+ except Exception as e:
133
+ logger.warning(
134
+ "Failed to remove stale lock %s: %s", lock_path, e
135
+ )
136
+ # retry acquire
137
+ fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
138
+ else:
139
+ # Another worker is processing this file
140
+ raise ValueError(f"File already being processed: {path}")
141
+
142
+ os.write(fd, b"lock")
143
+ os.close(fd)
144
+ fd = None
145
+ yield
146
+ finally:
147
+ try:
148
+ if fd is not None:
149
+ os.close(fd)
150
+ if lock_path.exists():
151
+ lock_path.unlink()
152
+ except OSError:
153
+ pass
154
+
155
+ def _sha256(self, path: Path, chunk: int = 1024 * 1024) -> str:
156
+ """Compute SHA256 hash of a file."""
157
+ h = hashlib.sha256()
158
+ with open(path, "rb") as f:
159
+ while True:
160
+ b = f.read(chunk)
161
+ if not b:
162
+ break
163
+ h.update(b)
164
+ return h.hexdigest()
165
+
166
+ def _get_pdf_dir(self) -> Path | None:
167
+ """Resolve the configured PDF directory to a concrete Path."""
168
+ candidate = getattr(path_utils, "PDF_DIR", None)
169
+ if isinstance(candidate, Path):
170
+ return candidate
171
+ if candidate is None:
172
+ return None
173
+ try:
174
+ derived = candidate / "."
175
+ except Exception:
176
+ derived = None
177
+
178
+ if derived is not None:
179
+ try:
180
+ return Path(derived)
181
+ except Exception:
182
+ return None
183
+
184
+ try:
185
+ return Path(str(candidate))
186
+ except Exception:
187
+ return None
188
+
189
+ def _quarantine(self, source: Path) -> Path:
190
+ """Move file to quarantine directory to prevent re-processing."""
191
+ qdir = path_utils.PDF_DIR / "_processing"
192
+ qdir.mkdir(parents=True, exist_ok=True)
193
+ target = qdir / source.name
194
+ try:
195
+ # Try atomic rename first (fastest when on same filesystem)
196
+ source.rename(target)
197
+ except OSError as exc:
198
+ if exc.errno == errno.EXDEV:
199
+ # Cross-device move, fall back to shutil.move which copies+removes
200
+ shutil.move(str(source), str(target))
201
+ else:
202
+ raise
203
+ return target
204
+
205
+ def _ensure_state(self, pdf_file: "RawPdfFile"):
206
+ """Ensure PDF file has a state object."""
207
+ if getattr(pdf_file, "state", None):
208
+ return pdf_file.state
209
+ if hasattr(pdf_file, "get_or_create_state"):
210
+ state = pdf_file.get_or_create_state()
211
+ pdf_file.state = state
212
+ return state
213
+ # Very defensive fallback
214
+ try:
215
+ state, _ = pdf_file.get_or_create_state(raw_pdf_file=pdf_file)
216
+ pdf_file.state = state
217
+ return state
218
+ except Exception:
219
+ return None
220
+
221
+ def _ensure_report_reading_available(self):
222
+ """
223
+ Ensure report reading modules are available by adding lx-anonymizer to path.
224
+
225
+ Returns:
226
+ Tuple of (availability_flag, ReportReader_class)
227
+ """
228
+ if self._report_reader_available is not None:
229
+ return self._report_reader_available, self._report_reader_class
230
+
231
+ try:
232
+ # Try direct import first
233
+ from lx_anonymizer import ReportReader
234
+
235
+ logger.info("Successfully imported lx_anonymizer ReportReader module")
236
+ self._report_reader_available = True
237
+ self._report_reader_class = ReportReader
238
+ return True, ReportReader
239
+
240
+ except ImportError:
241
+ # Optional: honor LX_ANONYMIZER_PATH=/abs/path/to/src
242
+ import importlib
243
+
244
+ extra = os.getenv("LX_ANONYMIZER_PATH")
245
+ if extra and extra not in sys.path and Path(extra).exists():
246
+ sys.path.insert(0, extra)
247
+ try:
248
+ mod = importlib.import_module("lx_anonymizer")
249
+ ReportReader = getattr(mod, "ReportReader")
250
+ logger.info(
251
+ "Imported lx_anonymizer.ReportReader via LX_ANONYMIZER_PATH"
252
+ )
253
+ self._report_reader_available = True
254
+ self._report_reader_class = ReportReader
255
+ return True, ReportReader
256
+ except Exception as e:
257
+ logger.warning(
258
+ "Failed importing lx_anonymizer via LX_ANONYMIZER_PATH: %s", e
259
+ )
260
+ finally:
261
+ # Keep path for future imports if it worked; otherwise remove.
262
+ if "ReportReader" not in locals() and extra in sys.path:
263
+ sys.path.remove(extra)
264
+
265
+ self._report_reader_available = False
266
+ self._report_reader_class = None
267
+ return False, None
268
+
269
+ def _ensure_default_patient_data(self, pdf_instance: "RawPdfFile" = None) -> None:
270
+ """
271
+ Ensure PDF has minimum required patient data in SensitiveMeta.
272
+ Creates default values if data is missing after text processing.
273
+ Uses the central PDF instance if no specific instance provided.
274
+
275
+ Args:
276
+ pdf_instance: Optional specific PDF instance, defaults to self.current_pdf
277
+ """
278
+ pdf_file = pdf_instance or self.current_pdf
279
+ if not pdf_file:
280
+ logger.warning(
281
+ "No PDF instance available for ensuring default patient data"
282
+ )
283
+ return
284
+
285
+ if not pdf_file.sensitive_meta:
286
+ logger.info(
287
+ f"No SensitiveMeta found for PDF {pdf_file.pdf_hash}, creating default"
288
+ )
289
+
290
+ # Create default SensitiveMeta with placeholder data
291
+ default_data = {
292
+ "patient_first_name": "Patient",
293
+ "patient_last_name": "Unknown",
294
+ "patient_dob": date(1990, 1, 1), # Default DOB
295
+ "examination_date": date.today(),
296
+ "center_name": pdf_file.center.name
297
+ if pdf_file.center
298
+ else "university_hospital_wuerzburg",
299
+ }
300
+
301
+ try:
302
+ sensitive_meta = SensitiveMeta.create_from_dict(default_data)
303
+ pdf_file.sensitive_meta = sensitive_meta
304
+ pdf_file.save(update_fields=["sensitive_meta"])
305
+ logger.info(
306
+ f"Created default SensitiveMeta for PDF {pdf_file.pdf_hash}"
307
+ )
308
+ except Exception as e:
309
+ logger.error(
310
+ f"Failed to create default SensitiveMeta for PDF {pdf_file.pdf_hash}: {e}"
311
+ )
312
+
313
+ def import_and_anonymize(
314
+ self,
315
+ file_path: Union[Path, str],
316
+ center_name: str,
317
+ delete_source: bool = False,
318
+ retry: bool = False,
319
+ ) -> "RawPdfFile":
320
+ """
321
+ Import a PDF file and anonymize it using ReportReader.
322
+ Uses centralized PDF instance management pattern.
323
+
324
+ The processing mode is determined by the service initialization:
325
+ - 'blackening': Creates an anonymized PDF with black rectangles over sensitive regions
326
+ - 'cropping': Advanced mode that crops sensitive regions to separate images
327
+
328
+ Args:
329
+ file_path: Path to the PDF file to import
330
+ center_name: Name of the center to associate with PDF
331
+ delete_source: Whether to delete the source file after import
332
+ retry: Whether this is a retry attempt
333
+
334
+ Returns:
335
+ RawPdfFile instance after import and processing
336
+
337
+ Raises:
338
+ Exception: On any failure during import or processing
339
+ """
340
+ try:
341
+ # Initialize processing context
342
+ self._initialize_processing_context(
343
+ file_path, center_name, delete_source, retry
344
+ )
345
+
346
+ # Step 1: Validate and prepare file
347
+ self._validate_and_prepare_file()
348
+
349
+ # Step 2: Create or retrieve PDF instance
350
+ self._create_or_retrieve_pdf_instance()
351
+
352
+ # Early return check - if no PDF instance was created, return None
353
+ if not self.current_pdf:
354
+ logger.warning(
355
+ f"No PDF instance created for {file_path}, returning None"
356
+ )
357
+ return None
358
+
359
+ # Step 3: Setup processing environment
360
+ self._setup_processing_environment()
361
+
362
+ # Step 4: Process text and metadata
363
+ self._process_text_and_metadata()
364
+
365
+ # Step 5: Finalize processing
366
+ self._finalize_processing()
367
+
368
+ return self.current_pdf
369
+
370
+ except ValueError as e:
371
+ # Handle "File already being processed" case specifically
372
+ if "already being processed" in str(e):
373
+ logger.info(f"Skipping file {file_path}: {e}")
374
+ return None
375
+ else:
376
+ logger.error(f"PDF import failed for {file_path}: {e}")
377
+ self._cleanup_on_error()
378
+ raise
379
+ except Exception as e:
380
+ logger.error(f"PDF import failed for {file_path}: {e}")
381
+ # Cleanup on error
382
+ self._cleanup_on_error()
383
+ raise
384
+ finally:
385
+ # Always cleanup context
386
+ self._cleanup_processing_context()
387
+
388
+ def _initialize_processing_context(
389
+ self,
390
+ file_path: Union[Path, str],
391
+ center_name: str,
392
+ delete_source: bool,
393
+ retry: bool,
394
+ ):
395
+ """Initialize the processing context for the current PDF."""
396
+ self.processing_context = {
397
+ "file_path": Path(file_path),
398
+ "original_file_path": Path(file_path),
399
+ "center_name": center_name,
400
+ "delete_source": delete_source,
401
+ "retry": retry,
402
+ "file_hash": None,
403
+ "processing_started": False,
404
+ "text_extracted": False,
405
+ "metadata_processed": False,
406
+ "anonymization_completed": False,
407
+ }
408
+
409
+ # Check if already processed (only during current session to prevent race conditions)
410
+ if str(file_path) in self.processed_files:
411
+ logger.info(
412
+ f"File {file_path} already being processed in current session, skipping"
413
+ )
414
+ raise ValueError("File already being processed")
415
+
416
+ logger.info(f"Starting import and processing for: {file_path}")
417
+
418
+ def _validate_and_prepare_file(self):
419
+ """Validate file existence and calculate hash."""
420
+ file_path = self.processing_context["file_path"]
421
+
422
+ if not file_path.exists():
423
+ raise FileNotFoundError(f"PDF file not found: {file_path}")
424
+
425
+ try:
426
+ self.processing_context["file_hash"] = self._sha256(file_path)
427
+ except Exception as e:
428
+ logger.warning(f"Could not calculate file hash: {e}")
429
+ self.processing_context["file_hash"] = None
430
+
431
+ def _create_or_retrieve_pdf_instance(self):
432
+ """Create new or retrieve existing PDF instance."""
433
+ file_path = self.processing_context["file_path"]
434
+ center_name = self.processing_context["center_name"]
435
+ delete_source = self.processing_context["delete_source"]
436
+ retry = self.processing_context["retry"]
437
+ file_hash = self.processing_context["file_hash"]
438
+
439
+ if not retry:
440
+ # Check for existing PDF and handle duplicates
441
+ with self._file_lock(file_path):
442
+ existing = None
443
+ if file_hash and RawPdfFile.objects.filter(pdf_hash=file_hash).exists():
444
+ existing = RawPdfFile.objects.get(pdf_hash=file_hash)
445
+
446
+ if existing:
447
+ logger.info(f"Found existing RawPdfFile {existing.pdf_hash}")
448
+ if existing.text:
449
+ logger.info(
450
+ f"Existing PDF {existing.pdf_hash} already processed - returning"
451
+ )
452
+ self.current_pdf = existing
453
+ return
454
+ else:
455
+ # Retry processing
456
+ logger.info(f"Reprocessing existing PDF {existing.pdf_hash}")
457
+ return self._retry_existing_pdf(existing)
458
+
459
+ # Create new PDF instance
460
+ logger.info("Creating new RawPdfFile instance...")
461
+ from django.db import IntegrityError
462
+
463
+ try:
464
+ if not retry:
465
+ self.current_pdf = RawPdfFile.create_from_file_initialized(
466
+ file_path=file_path,
467
+ center_name=center_name,
468
+ delete_source=delete_source,
469
+ )
470
+ else:
471
+ # Retrieve existing for retry
472
+ self.current_pdf = RawPdfFile.objects.get(pdf_hash=file_hash)
473
+ logger.info(
474
+ f"Retrying import for existing RawPdfFile {self.current_pdf.pdf_hash}"
475
+ )
476
+
477
+ # Check if retry is actually needed
478
+ if self.current_pdf.text:
479
+ logger.info(
480
+ f"Existing PDF {self.current_pdf.pdf_hash} already processed during retry - returning"
481
+ )
482
+ return
483
+
484
+ if not self.current_pdf:
485
+ raise RuntimeError("Failed to create RawPdfFile instance")
486
+
487
+ logger.info(f"PDF instance ready: {self.current_pdf.pdf_hash}")
488
+
489
+ except IntegrityError:
490
+ # Race condition - another worker created it
491
+ if file_hash:
492
+ self.current_pdf = RawPdfFile.objects.get(pdf_hash=file_hash)
493
+ logger.info("Race condition detected, using existing RawPdfFile")
494
+ else:
495
+ raise
496
+
497
+ def _setup_processing_environment(self):
498
+ """Setup processing environment and state."""
499
+ original_path = self.processing_context.get("file_path")
500
+
501
+ # Create sensitive file copy
502
+ self.create_sensitive_file(self.current_pdf, original_path)
503
+
504
+ # Update file path to point to sensitive copy
505
+ self.processing_context["file_path"] = self.current_pdf.file.path
506
+ self.processing_context["sensitive_copy_created"] = True
507
+ try:
508
+ self.processing_context["sensitive_file_path"] = Path(
509
+ self.current_pdf.file.path
510
+ )
511
+ except Exception:
512
+ self.processing_context["sensitive_file_path"] = None
513
+
514
+ # Ensure state exists
515
+ state = self.current_pdf.get_or_create_state()
516
+ state.mark_processing_started()
517
+ self.processing_context["processing_started"] = True
518
+
519
+ # Mark as processed to prevent duplicates
520
+ self.processed_files.add(str(self.processing_context["file_path"]))
521
+
522
+ # Ensure default patient data
523
+ logger.info("Ensuring default patient data...")
524
+ self._ensure_default_patient_data(self.current_pdf)
525
+
526
+ def _process_text_and_metadata(self):
527
+ """Process text extraction and metadata using ReportReader."""
528
+ report_reading_available, ReportReader = self._ensure_report_reading_available()
529
+
530
+ if not report_reading_available:
531
+ logger.warning("Report reading not available (lx_anonymizer not found)")
532
+ self._mark_processing_incomplete("no_report_reader")
533
+ return
534
+
535
+ if not self.current_pdf.file:
536
+ logger.warning("No file available for text processing")
537
+ self._mark_processing_incomplete("no_file")
538
+ return
539
+
540
+ try:
541
+ logger.info(
542
+ f"Starting text extraction and metadata processing with ReportReader (mode: {self.processing_mode})..."
543
+ )
544
+
545
+ # Initialize ReportReader
546
+ report_reader = ReportReader(
547
+ report_root_path=str(path_utils.STORAGE_DIR),
548
+ locale="de_DE",
549
+ text_date_format="%d.%m.%Y",
550
+ )
551
+
552
+ if self.processing_mode == "cropping":
553
+ # Use advanced cropping method (existing implementation)
554
+ self._process_with_cropping(report_reader)
555
+ else: # blackening mode
556
+ # Use enhanced process_report with PDF masking
557
+ self._process_with_blackening(report_reader)
558
+
559
+ except Exception as e:
560
+ logger.warning(f"Text processing failed: {e}")
561
+ self._mark_processing_incomplete("text_processing_failed")
562
+
563
+ def _process_with_blackening(self, report_reader):
564
+ """Process PDF using simple blackening/masking mode."""
565
+ logger.info("Using simple PDF blackening mode...")
566
+
567
+ # Setup anonymized directory
568
+ anonymized_dir = path_utils.PDF_DIR / "anonymized"
569
+ anonymized_dir.mkdir(parents=True, exist_ok=True)
570
+
571
+ # Generate output path for anonymized PDF
572
+ pdf_hash = self.current_pdf.pdf_hash
573
+ anonymized_output_path = anonymized_dir / f"{pdf_hash}_anonymized.pdf"
574
+
575
+ # Process with enhanced process_report method (returns 4-tuple now)
576
+ original_text, anonymized_text, extracted_metadata, anonymized_pdf_path = (
577
+ report_reader.process_report(
578
+ pdf_path=self.processing_context["file_path"],
579
+ create_anonymized_pdf=True,
580
+ anonymized_pdf_output_path=str(anonymized_output_path),
581
+ )
582
+ )
583
+
584
+ # Store results in context
585
+ self.processing_context.update(
586
+ {
587
+ "original_text": original_text,
588
+ "anonymized_text": anonymized_text,
589
+ "extracted_metadata": extracted_metadata,
590
+ "cropped_regions": None, # Not available in blackening mode
591
+ "anonymized_pdf_path": anonymized_pdf_path,
592
+ }
593
+ )
594
+
595
+ # Apply results
596
+ if original_text:
597
+ self._apply_text_results()
598
+ self.processing_context["text_extracted"] = True
599
+
600
+ if extracted_metadata:
601
+ self._apply_metadata_results()
602
+ self.processing_context["metadata_processed"] = True
603
+
604
+ if anonymized_pdf_path:
605
+ self._apply_anonymized_pdf()
606
+ self.processing_context["anonymization_completed"] = True
607
+
608
+ logger.info("PDF blackening processing completed")
609
+
610
+ def _process_with_cropping(self, report_reader):
611
+ """Process PDF using advanced cropping mode (existing implementation)."""
612
+ logger.info("Using advanced cropping mode...")
613
+
614
+ # Setup output directories
615
+ crops_dir = path_utils.PDF_DIR / "cropped_regions"
616
+ anonymized_dir = path_utils.PDF_DIR / "anonymized"
617
+ crops_dir.mkdir(parents=True, exist_ok=True)
618
+ anonymized_dir.mkdir(parents=True, exist_ok=True)
619
+
620
+ # Process with cropping (returns 5-tuple)
621
+ (
622
+ original_text,
623
+ anonymized_text,
624
+ extracted_metadata,
625
+ cropped_regions,
626
+ anonymized_pdf_path,
627
+ ) = report_reader.process_report_with_cropping(
628
+ pdf_path=self.processing_context["file_path"],
629
+ crop_sensitive_regions=True,
630
+ crop_output_dir=str(crops_dir),
631
+ anonymization_output_dir=str(anonymized_dir),
632
+ )
633
+
634
+ # Store results in context
635
+ self.processing_context.update(
636
+ {
637
+ "original_text": original_text,
638
+ "anonymized_text": anonymized_text,
639
+ "extracted_metadata": extracted_metadata,
640
+ "cropped_regions": cropped_regions,
641
+ "anonymized_pdf_path": anonymized_pdf_path,
642
+ }
643
+ )
644
+
645
+ # Apply results
646
+ if original_text:
647
+ self._apply_text_results()
648
+ self.processing_context["text_extracted"] = True
649
+
650
+ if extracted_metadata:
651
+ self._apply_metadata_results()
652
+ self.processing_context["metadata_processed"] = True
653
+
654
+ if anonymized_pdf_path:
655
+ self._apply_anonymized_pdf()
656
+ self.processing_context["anonymization_completed"] = True
657
+
658
+ logger.info("PDF cropping processing completed")
659
+
660
+ def _apply_text_results(self):
661
+ """Apply text extraction results to the PDF instance."""
662
+ if not self.current_pdf:
663
+ logger.warning("Cannot apply text results - no PDF instance available")
664
+ return
665
+
666
+ original_text = self.processing_context.get("original_text")
667
+ anonymized_text = self.processing_context.get("anonymized_text")
668
+
669
+ if not original_text:
670
+ logger.warning("No original text available to apply")
671
+ return
672
+
673
+ # Store extracted text
674
+ self.current_pdf.text = original_text
675
+ logger.info(f"Extracted {len(original_text)} characters of text from PDF")
676
+
677
+ # Handle anonymized text
678
+ if anonymized_text and anonymized_text != original_text:
679
+ self.current_pdf.anonymized = True
680
+ logger.info("PDF text anonymization completed")
681
+
682
+ def _apply_metadata_results(self):
683
+ """Apply metadata extraction results to SensitiveMeta."""
684
+ if not self.current_pdf:
685
+ logger.warning("Cannot apply metadata results - no PDF instance available")
686
+ return
687
+
688
+ extracted_metadata = self.processing_context.get("extracted_metadata")
689
+
690
+ if not self.current_pdf.sensitive_meta or not extracted_metadata:
691
+ logger.debug("No sensitive meta or extracted metadata available")
692
+ return
693
+
694
+ sm = self.current_pdf.sensitive_meta
695
+
696
+ # Map ReportReader metadata to SensitiveMeta fields
697
+ metadata_mapping = {
698
+ "patient_first_name": "patient_first_name",
699
+ "patient_last_name": "patient_last_name",
700
+ "patient_dob": "patient_dob",
701
+ "examination_date": "examination_date",
702
+ "examiner_first_name": "examiner_first_name",
703
+ "examiner_last_name": "examiner_last_name",
704
+ "endoscope_type": "endoscope_type",
705
+ "casenumber": "case_number",
706
+ }
707
+
708
+ # Update fields with extracted information
709
+ updated_fields = []
710
+ for meta_key, sm_field in metadata_mapping.items():
711
+ if extracted_metadata.get(meta_key) and hasattr(sm, sm_field):
712
+ old_value = getattr(sm, sm_field)
713
+ raw_value = extracted_metadata[meta_key]
714
+
715
+ # Skip if we just got the field name as a string (indicates no actual data)
716
+ if isinstance(raw_value, str) and raw_value == meta_key:
717
+ continue
718
+
719
+ # Handle date fields specially
720
+ if sm_field in ["patient_dob", "examination_date"]:
721
+ new_value = self._parse_date_field(raw_value, meta_key, sm_field)
722
+ if new_value is None:
723
+ continue
724
+ else:
725
+ new_value = raw_value
726
+
727
+ # Configurable overwrite policy
728
+ should_overwrite = (
729
+ self.allow_meta_overwrite
730
+ or not old_value
731
+ or old_value in ["Patient", "Unknown"]
732
+ )
733
+ if new_value and should_overwrite:
734
+ setattr(sm, sm_field, new_value)
735
+ updated_fields.append(sm_field)
736
+
737
+ if updated_fields:
738
+ sm.save()
739
+ logger.info(f"Updated SensitiveMeta fields: {updated_fields}")
740
+
741
+ def _parse_date_field(self, raw_value, meta_key, sm_field):
742
+ """Parse date field with error handling."""
743
+ try:
744
+ if isinstance(raw_value, str):
745
+ # Skip if the value is just the field name itself
746
+ if raw_value == meta_key:
747
+ logger.warning(
748
+ "Skipping date field %s - got field name '%s' instead of actual date",
749
+ sm_field,
750
+ raw_value,
751
+ )
752
+ return None
753
+
754
+ # Try common date formats
755
+ date_formats = ["%Y-%m-%d", "%d.%m.%Y", "%d/%m/%Y", "%m/%d/%Y"]
756
+ for fmt in date_formats:
757
+ try:
758
+ return datetime.strptime(raw_value, fmt).date()
759
+ except ValueError:
760
+ continue
761
+
762
+ logger.warning(
763
+ "Could not parse date '%s' for field %s", raw_value, sm_field
764
+ )
765
+ return None
766
+
767
+ elif hasattr(raw_value, "date"):
768
+ return raw_value.date()
769
+ else:
770
+ return raw_value
771
+
772
+ except (ValueError, AttributeError) as e:
773
+ logger.warning("Date parsing failed for %s: %s", sm_field, e)
774
+ return None
775
+
776
+ # from gc-08
777
+ def _apply_anonymized_pdf(self):
778
+ """
779
+ Attach the already-generated anonymized PDF without copying bytes.
780
+
781
+ We do NOT re-upload or re-save file bytes via Django storage (which would
782
+ place a new file under upload_to='raw_pdfs' and retrigger the watcher).
783
+ Instead, we point the FileField to the path that the anonymizer already
784
+ wrote (ideally relative to STORAGE_DIR). Additionally, we make sure the
785
+ model/state reflect that anonymization is done even if text didn't change.
786
+ """
787
+ if not self.current_pdf:
788
+ logger.warning("Cannot apply anonymized PDF - no PDF instance available")
789
+ return
790
+
791
+ anonymized_pdf_path = self.processing_context.get("anonymized_pdf_path")
792
+ if not anonymized_pdf_path:
793
+ logger.debug("No anonymized_pdf_path present in processing context")
794
+ return
795
+
796
+ anonymized_path = Path(anonymized_pdf_path)
797
+ if not anonymized_path.exists():
798
+ logger.warning(
799
+ "Anonymized PDF path returned but file does not exist: %s",
800
+ anonymized_path,
801
+ )
802
+ return
803
+
804
+ logger.info("Anonymized PDF created by ReportReader at: %s", anonymized_path)
805
+
806
+ try:
807
+ # Prefer storing a path relative to STORAGE_DIR so Django serves it correctly
808
+ try:
809
+ relative_name = str(anonymized_path.relative_to(path_utils.STORAGE_DIR))
810
+ except ValueError:
811
+ # Fallback to absolute path if the file lives outside STORAGE_DIR
812
+ relative_name = str(anonymized_path)
813
+
814
+ # Only update if something actually changed
815
+ if getattr(self.current_pdf.anonymized_file, "name", None) != relative_name:
816
+ self.current_pdf.anonymized_file.name = relative_name
817
+
818
+ # Ensure model/state reflect anonymization even if text didn't differ
819
+ if not getattr(self.current_pdf, "anonymized", False):
820
+ self.current_pdf.anonymized = True
821
+
822
+ # Persist cropped regions info somewhere useful (optional & non-breaking)
823
+ # If your model has a field for this, persist there; otherwise we just log.
824
+ cropped_regions = self.processing_context.get("cropped_regions")
825
+ if cropped_regions:
826
+ logger.debug(
827
+ "Cropped regions recorded (%d regions).", len(cropped_regions)
828
+ )
829
+
830
+ # Save model changes
831
+ update_fields = ["anonymized_file"]
832
+ if "anonymized" in self.current_pdf.__dict__:
833
+ update_fields.append("anonymized")
834
+ self.current_pdf.save(update_fields=update_fields)
835
+
836
+ # Mark state as anonymized immediately; this keeps downstream flows working
837
+ state = self._ensure_state(self.current_pdf)
838
+ if state and not state.anonymized:
839
+ state.mark_anonymized(save=True)
840
+
841
+ logger.info(
842
+ "Updated anonymized_file reference to: %s",
843
+ self.current_pdf.anonymized_file.name,
844
+ )
845
+
846
+ except Exception as e:
847
+ logger.warning("Could not set anonymized file reference: %s", e)
848
+
849
+ def _finalize_processing(self):
850
+ """Finalize processing and update state."""
851
+ if not self.current_pdf:
852
+ logger.warning("Cannot finalize processing - no PDF instance available")
853
+ return
854
+
855
+ try:
856
+ # Update state based on processing results
857
+ state = self._ensure_state(self.current_pdf)
858
+
859
+ if self.processing_context.get("text_extracted") and state:
860
+ state.mark_anonymized()
861
+
862
+ # Mark as ready for validation after successful anonymization
863
+ if self.processing_context.get("anonymization_completed") and state:
864
+ state.mark_sensitive_meta_processed()
865
+ logger.info(
866
+ f"PDF {self.current_pdf.pdf_hash} processing completed - "
867
+ f"ready for validation (status: {state.anonymization_status})"
868
+ )
869
+
870
+ # Save all changes
871
+ with transaction.atomic():
872
+ self.current_pdf.save()
873
+ if state:
874
+ state.save()
875
+
876
+ logger.info("PDF processing completed successfully")
877
+ except Exception as e:
878
+ logger.warning(f"Failed to finalize processing: {e}")
879
+
880
+ def _mark_processing_incomplete(self, reason: str):
881
+ """Mark processing as incomplete with reason."""
882
+ if not self.current_pdf:
883
+ logger.warning(
884
+ f"Cannot mark processing incomplete - no PDF instance available. Reason: {reason}"
885
+ )
886
+ return
887
+
888
+ try:
889
+ state = self._ensure_state(self.current_pdf)
890
+ if state:
891
+ state.text_meta_extracted = False
892
+ state.pdf_meta_extracted = False
893
+ state.sensitive_meta_processed = False
894
+ state.save()
895
+ logger.info(f"Set PDF state: processed=False due to {reason}")
896
+
897
+ # Save changes
898
+ with transaction.atomic():
899
+ self.current_pdf.save()
900
+ except Exception as e:
901
+ logger.warning(f"Failed to mark processing incomplete: {e}")
902
+
903
+ def _retry_existing_pdf(self, existing_pdf):
904
+ """
905
+ Retry processing for existing PDF.
906
+
907
+ Uses get_raw_file_path() to find the original raw file instead of
908
+ relying on the file field which may point to a deleted sensitive file.
909
+ """
910
+ try:
911
+ # ✅ FIX: Use get_raw_file_path() to find original file
912
+ raw_file_path = existing_pdf.get_raw_file_path()
913
+
914
+ if not raw_file_path or not raw_file_path.exists():
915
+ logger.error(
916
+ f"Cannot retry PDF {existing_pdf.pdf_hash}: Raw file not found. "
917
+ f"Please re-upload the original PDF file."
918
+ )
919
+ self.current_pdf = existing_pdf
920
+ return existing_pdf
921
+
922
+ logger.info(f"Found raw file for retry at: {raw_file_path}")
923
+
924
+ # Remove from processed files to allow retry
925
+ file_path_str = str(raw_file_path)
926
+ if file_path_str in self.processed_files:
927
+ self.processed_files.remove(file_path_str)
928
+ logger.debug(f"Removed {file_path_str} from processed files for retry")
929
+
930
+ return self.import_and_anonymize(
931
+ file_path=raw_file_path, # ✅ Use raw file path, not sensitive path
932
+ center_name=existing_pdf.center.name
933
+ if existing_pdf.center
934
+ else "unknown_center",
935
+ delete_source=False, # Never delete during retry
936
+ retry=True,
937
+ )
938
+ except Exception as e:
939
+ logger.error(
940
+ f"Failed to re-import existing PDF {existing_pdf.pdf_hash}: {e}"
941
+ )
942
+ self.current_pdf = existing_pdf
943
+ return existing_pdf
944
+
945
+ def _cleanup_on_error(self):
946
+ """Cleanup processing context on error."""
947
+ try:
948
+ if self.current_pdf and hasattr(self.current_pdf, "state"):
949
+ state = self._ensure_state(self.current_pdf)
950
+ if state and self.processing_context.get("processing_started"):
951
+ state.text_meta_extracted = False
952
+ state.pdf_meta_extracted = False
953
+ state.sensitive_meta_processed = False
954
+ state.save()
955
+ logger.debug("Updated PDF state to indicate processing failure")
956
+ except Exception as e:
957
+ logger.warning(f"Error during cleanup: {e}")
958
+ finally:
959
+ # Remove any sensitive copy created during this processing run
960
+ sensitive_created = self.processing_context.get("sensitive_copy_created")
961
+ if sensitive_created:
962
+ pdf_obj = self.current_pdf
963
+ try:
964
+ if pdf_obj:
965
+ file_field = getattr(pdf_obj, "file", None)
966
+ if file_field and getattr(file_field, "name", None):
967
+ storage_name = file_field.name
968
+ file_field.delete(save=False)
969
+ logger.debug(
970
+ "Deleted sensitive copy %s during error cleanup",
971
+ storage_name,
972
+ )
973
+ except Exception as cleanup_exc:
974
+ logger.warning(
975
+ "Failed to remove sensitive copy during error cleanup: %s",
976
+ cleanup_exc,
977
+ )
978
+
979
+ # Always clean up processed files set to prevent blocks
980
+ file_path = self.processing_context.get("file_path")
981
+ if file_path and str(file_path) in self.processed_files:
982
+ self.processed_files.remove(str(file_path))
983
+ logger.debug(
984
+ f"Removed {file_path} from processed files during error cleanup"
985
+ )
986
+
987
+ try:
988
+ original_path = self.processing_context.get("original_file_path")
989
+ logger.debug(
990
+ "PDF cleanup original path: %s (%s)",
991
+ original_path,
992
+ type(original_path),
993
+ )
994
+ raw_dir = (
995
+ original_path.parent if isinstance(original_path, Path) else None
996
+ )
997
+ if (
998
+ isinstance(original_path, Path)
999
+ and original_path.exists()
1000
+ and not self.processing_context.get("sensitive_copy_created")
1001
+ ):
1002
+ try:
1003
+ original_path.unlink()
1004
+ logger.info(
1005
+ "Removed original file %s during error cleanup",
1006
+ original_path,
1007
+ )
1008
+ except Exception as remove_exc:
1009
+ logger.warning(
1010
+ "Could not remove original file %s during error cleanup: %s",
1011
+ original_path,
1012
+ remove_exc,
1013
+ )
1014
+ pdf_dir = self._get_pdf_dir()
1015
+ if not pdf_dir and raw_dir:
1016
+ base_dir = raw_dir.parent
1017
+ dir_name = getattr(path_utils, "PDF_DIR_NAME", "pdfs")
1018
+ fallback_pdf_dir = base_dir / dir_name
1019
+ logger.debug(
1020
+ "PDF cleanup fallback resolution - base: %s, dir_name: %s, exists: %s",
1021
+ base_dir,
1022
+ dir_name,
1023
+ fallback_pdf_dir.exists(),
1024
+ )
1025
+ if fallback_pdf_dir.exists():
1026
+ pdf_dir = fallback_pdf_dir
1027
+
1028
+ # Remove empty PDF subdirectories that might have been created during setup
1029
+ if pdf_dir and pdf_dir.exists():
1030
+ for subdir_name in (
1031
+ "sensitive",
1032
+ "cropped_regions",
1033
+ "anonymized",
1034
+ "_processing",
1035
+ ):
1036
+ subdir_path = pdf_dir / subdir_name
1037
+ if subdir_path.exists() and subdir_path.is_dir():
1038
+ try:
1039
+ next(subdir_path.iterdir())
1040
+ except StopIteration:
1041
+ try:
1042
+ subdir_path.rmdir()
1043
+ logger.debug(
1044
+ "Removed empty directory %s during error cleanup",
1045
+ subdir_path,
1046
+ )
1047
+ except OSError as rm_err:
1048
+ logger.debug(
1049
+ "Could not remove directory %s: %s",
1050
+ subdir_path,
1051
+ rm_err,
1052
+ )
1053
+ except Exception as iter_err:
1054
+ logger.debug(
1055
+ "Could not inspect directory %s: %s",
1056
+ subdir_path,
1057
+ iter_err,
1058
+ )
1059
+
1060
+ raw_count = (
1061
+ len(list(raw_dir.glob("*")))
1062
+ if raw_dir and raw_dir.exists()
1063
+ else None
1064
+ )
1065
+ pdf_count = (
1066
+ len(list(pdf_dir.glob("*")))
1067
+ if pdf_dir and pdf_dir.exists()
1068
+ else None
1069
+ )
1070
+
1071
+ sensitive_path = self.processing_context.get("sensitive_file_path")
1072
+ if sensitive_path:
1073
+ sensitive_parent = Path(sensitive_path).parent
1074
+ sensitive_count = (
1075
+ len(list(sensitive_parent.glob("*")))
1076
+ if sensitive_parent.exists()
1077
+ else None
1078
+ )
1079
+ else:
1080
+ sensitive_dir = pdf_dir / "sensitive" if pdf_dir else None
1081
+ sensitive_count = (
1082
+ len(list(sensitive_dir.glob("*")))
1083
+ if sensitive_dir and sensitive_dir.exists()
1084
+ else None
1085
+ )
1086
+
1087
+ logger.info(
1088
+ "PDF import error cleanup counts - raw: %s, pdf: %s, sensitive: %s",
1089
+ raw_count,
1090
+ pdf_count,
1091
+ sensitive_count,
1092
+ )
1093
+ except Exception:
1094
+ pass
1095
+
1096
+ def _cleanup_processing_context(self):
1097
+ """Cleanup processing context."""
1098
+ try:
1099
+ # Clean up temporary directories
1100
+ if self.processing_context.get("text_extracted"):
1101
+ crops_dir = path_utils.PDF_DIR / "cropped_regions"
1102
+ if crops_dir.exists() and not any(crops_dir.iterdir()):
1103
+ crops_dir.rmdir()
1104
+
1105
+ # Always remove from processed files set after processing attempt
1106
+ file_path = self.processing_context.get("file_path")
1107
+ if file_path and str(file_path) in self.processed_files:
1108
+ self.processed_files.remove(str(file_path))
1109
+ logger.debug(f"Removed {file_path} from processed files set")
1110
+
1111
+ except Exception as e:
1112
+ logger.warning(f"Error during context cleanup: {e}")
1113
+ finally:
1114
+ # Reset context
1115
+ self.current_pdf = None
1116
+ self.processing_context = {}
1117
+
1118
+ def import_simple(
1119
+ self, file_path: Union[Path, str], center_name: str, delete_source: bool = False
1120
+ ) -> "RawPdfFile":
1121
+ """
1122
+ Simple PDF import without text processing or anonymization.
1123
+ Uses centralized PDF instance management pattern.
1124
+
1125
+ Args:
1126
+ file_path: Path to the PDF file to import
1127
+ center_name: Name of the center to associate with PDF
1128
+ delete_source: Whether to delete the source file after import
1129
+
1130
+ Returns:
1131
+ RawPdfFile instance after basic import
1132
+ """
1133
+ try:
1134
+ # Initialize simple processing context
1135
+ self._initialize_processing_context(
1136
+ file_path, center_name, delete_source, False
1137
+ )
1138
+
1139
+ # Validate file
1140
+ self._validate_and_prepare_file()
1141
+
1142
+ # Create PDF instance
1143
+ logger.info("Starting simple import - creating RawPdfFile instance...")
1144
+ self.current_pdf = RawPdfFile.create_from_file_initialized(
1145
+ file_path=self.processing_context["file_path"],
1146
+ center_name=center_name,
1147
+ delete_source=delete_source,
1148
+ )
1149
+
1150
+ if not self.current_pdf:
1151
+ raise RuntimeError("Failed to create RawPdfFile instance")
1152
+
1153
+ # Mark as processed
1154
+ self.processed_files.add(str(self.processing_context["file_path"]))
1155
+
1156
+ # Set basic state for simple import
1157
+ state = self._ensure_state(self.current_pdf)
1158
+ if state:
1159
+ state.text_meta_extracted = False
1160
+ state.pdf_meta_extracted = False
1161
+ state.sensitive_meta_processed = False
1162
+ state.save()
1163
+ logger.info("Set PDF state: processed=False for simple import")
1164
+
1165
+ # Save changes
1166
+ with transaction.atomic():
1167
+ self.current_pdf.save()
1168
+
1169
+ logger.info(
1170
+ "Simple import completed for RawPdfFile hash: %s",
1171
+ self.current_pdf.pdf_hash,
1172
+ )
1173
+ return self.current_pdf
1174
+
1175
+ except Exception as e:
1176
+ logger.error(f"Simple PDF import failed for {file_path}: {e}")
1177
+ self._cleanup_on_error()
1178
+ raise
1179
+ finally:
1180
+ self._cleanup_processing_context()
1181
+
1182
+ def check_storage_capacity(
1183
+ self, file_path: Union[Path, str], storage_root, min_required_space
1184
+ ) -> None:
1185
+ """
1186
+ Check if there is sufficient storage capacity for the PDF file.
1187
+
1188
+ Args:
1189
+ file_path: Path to the PDF file to check
1190
+
1191
+ Raises:
1192
+ InsufficientStorageError: If there is not enough space
1193
+ """
1194
+ import shutil
1195
+
1196
+ from endoreg_db.exceptions import InsufficientStorageError
1197
+
1198
+ file_path = Path(file_path)
1199
+ if not file_path.exists():
1200
+ raise FileNotFoundError(f"File not found for storage check: {file_path}")
1201
+
1202
+ # Get the size of the file
1203
+ file_size = file_path.stat().st_size
1204
+
1205
+ # Get available space in the storage directory
1206
+
1207
+ total, used, free = shutil.disk_usage(storage_root)
1208
+
1209
+ if file_size:
1210
+ min_required_space = file_size if isinstance(min_required_space, int) else 0
1211
+
1212
+ # Check if there is enough space
1213
+ if file_size > free:
1214
+ raise InsufficientStorageError(
1215
+ f"Not enough space to store PDF file: {file_path}"
1216
+ )
1217
+ logger.info(
1218
+ f"Storage check passed for {file_path}: {file_size} bytes, {free} bytes available"
1219
+ )
1220
+
1221
+ return True
1222
+
1223
+ def create_sensitive_file(
1224
+ self, pdf_instance: "RawPdfFile" = None, file_path: Union[Path, str] = None
1225
+ ) -> None:
1226
+ """
1227
+ Create a copy of the PDF file in the sensitive directory and update the file reference.
1228
+ Delete the source path to avoid duplicates.
1229
+ Uses the central PDF instance and processing context if parameters not provided.
1230
+
1231
+ Ensures the FileField points to the file under STORAGE_DIR/pdfs/sensitive and never back to raw_pdfs.
1232
+ """
1233
+ pdf_file = pdf_instance or self.current_pdf
1234
+ source_path = (
1235
+ Path(file_path) if file_path else self.processing_context.get("file_path")
1236
+ )
1237
+
1238
+ if not pdf_file:
1239
+ raise ValueError("No PDF instance available for creating sensitive file")
1240
+ if not source_path:
1241
+ raise ValueError("No file path available for creating sensitive file")
1242
+
1243
+ SENSITIVE_DIR = path_utils.PDF_DIR / "sensitive"
1244
+ target = SENSITIVE_DIR / f"{pdf_file.pdf_hash}.pdf"
1245
+
1246
+ try:
1247
+ os.makedirs(SENSITIVE_DIR, exist_ok=True)
1248
+
1249
+ # If source already is the target, just ensure FileField points correctly
1250
+ if source_path.resolve() == target.resolve():
1251
+ pass
1252
+ else:
1253
+ # Move the file from ingress to sensitive storage
1254
+ # Using replace semantics when target exists (re-import)
1255
+ if target.exists():
1256
+ try:
1257
+ target.unlink()
1258
+ except Exception as e:
1259
+ logger.warning(
1260
+ "Could not remove existing sensitive target %s: %s",
1261
+ target,
1262
+ e,
1263
+ )
1264
+ shutil.move(str(source_path), str(target))
1265
+ logger.info(f"Moved PDF to sensitive directory: {target}")
1266
+
1267
+ # Update FileField to reference the file under STORAGE_DIR
1268
+ # We avoid re-saving file content (the file is already at target); set .name relative to STORAGE_DIR
1269
+ try:
1270
+ relative_name = str(
1271
+ target.relative_to(path_utils.STORAGE_DIR)
1272
+ ) # Point Django FileField to sensitive storage
1273
+ except ValueError:
1274
+ # Fallback: if target is not under STORAGE_DIR, store absolute path (not ideal)
1275
+ relative_name = str(target)
1276
+
1277
+ # Only update when changed
1278
+ if getattr(pdf_file.file, "name", None) != relative_name:
1279
+ pdf_file.file.name = relative_name
1280
+ pdf_file.save(update_fields=["file"])
1281
+ logger.info(
1282
+ "Updated PDF FileField reference to sensitive path: %s",
1283
+ pdf_file.file.path,
1284
+ )
1285
+ else:
1286
+ logger.debug(
1287
+ "PDF FileField already points to sensitive path: %s",
1288
+ pdf_file.file.path,
1289
+ )
1290
+
1291
+ # Best-effort: if original source still exists (e.g., copy), remove it to avoid re-triggers
1292
+ try:
1293
+ if source_path.exists() and source_path != target:
1294
+ os.remove(source_path)
1295
+ logger.info(f"Removed original PDF file at ingress: {source_path}")
1296
+ except OSError as e:
1297
+ logger.warning(f"Could not delete original PDF file {source_path}: {e}")
1298
+
1299
+ except Exception as e:
1300
+ logger.warning(
1301
+ f"Could not create sensitive file copy for {pdf_file.pdf_hash}: {e}",
1302
+ exc_info=True,
1303
+ )
1304
+
1305
+ def archive_or_quarantine_file(
1306
+ self,
1307
+ pdf_instance: "RawPdfFile" = None,
1308
+ source_file_path: Union[Path, str] = None,
1309
+ quarantine_reason: str = None,
1310
+ is_pdf_problematic: bool = None,
1311
+ ) -> bool:
1312
+ """
1313
+ Archive or quarantine file based on the state of the PDF processing.
1314
+ Uses the central PDF instance and processing context if parameters not provided.
1315
+
1316
+ Args:
1317
+ pdf_instance: Optional PDF instance, defaults to self.current_pdf
1318
+ source_file_path: Optional source file path, defaults to processing_context['file_path']
1319
+ quarantine_reason: Optional quarantine reason, defaults to processing_context['error_reason']
1320
+ is_pdf_problematic: Optional override for problematic state
1321
+
1322
+ Returns:
1323
+ bool: True if file was quarantined, False if archived successfully
1324
+ """
1325
+ pdf_file = pdf_instance or self.current_pdf
1326
+ file_path = (
1327
+ Path(source_file_path)
1328
+ if source_file_path
1329
+ else self.processing_context.get("file_path")
1330
+ )
1331
+ quarantine_reason = quarantine_reason or self.processing_context.get(
1332
+ "error_reason"
1333
+ )
1334
+
1335
+ if not pdf_file:
1336
+ raise ValueError("No PDF instance available for archiving/quarantine")
1337
+ if not file_path:
1338
+ raise ValueError("No file path available for archiving/quarantine")
1339
+
1340
+ # Determine if the PDF is problematic
1341
+ pdf_problematic = (
1342
+ is_pdf_problematic
1343
+ if is_pdf_problematic is not None
1344
+ else pdf_file.is_problematic
1345
+ )
1346
+
1347
+ if pdf_problematic:
1348
+ # Quarantine the file
1349
+ logger.warning(
1350
+ f"Quarantining problematic PDF: {pdf_file.pdf_hash}, reason: {quarantine_reason}"
1351
+ )
1352
+ quarantine_dir = path_utils.PDF_DIR / "quarantine"
1353
+ os.makedirs(quarantine_dir, exist_ok=True)
1354
+
1355
+ quarantine_path = quarantine_dir / f"{pdf_file.pdf_hash}.pdf"
1356
+ try:
1357
+ shutil.move(file_path, quarantine_path)
1358
+ pdf_file.quarantine_reason = (
1359
+ quarantine_reason or "File processing failed"
1360
+ )
1361
+ pdf_file.save(update_fields=["quarantine_reason"])
1362
+ logger.info(f"Moved problematic PDF to quarantine: {quarantine_path}")
1363
+ return True
1364
+ except Exception as e:
1365
+ logger.error(f"Failed to quarantine PDF {pdf_file.pdf_hash}: {e}")
1366
+ return (
1367
+ True # Still consider as quarantined to prevent further processing
1368
+ )
1369
+ else:
1370
+ # Archive the file normally
1371
+ logger.info(f"Archiving successfully processed PDF: {pdf_file.pdf_hash}")
1372
+ archive_dir = path_utils.PDF_DIR / "processed"
1373
+ os.makedirs(archive_dir, exist_ok=True)
1374
+
1375
+ archive_path = archive_dir / f"{pdf_file.pdf_hash}.pdf"
1376
+ try:
1377
+ shutil.move(file_path, archive_path)
1378
+ logger.info(f"Moved processed PDF to archive: {archive_path}")
1379
+ return False
1380
+ except Exception as e:
1381
+ logger.error(f"Failed to archive PDF {pdf_file.pdf_hash}: {e}")
1382
+ return False