endoreg-db 0.8.4.4__py3-none-any.whl → 0.8.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (372) hide show
  1. endoreg_db/authz/auth.py +74 -0
  2. endoreg_db/authz/backends.py +168 -0
  3. endoreg_db/authz/management/commands/list_routes.py +18 -0
  4. endoreg_db/authz/middleware.py +83 -0
  5. endoreg_db/authz/permissions.py +127 -0
  6. endoreg_db/authz/policy.py +218 -0
  7. endoreg_db/authz/views_auth.py +66 -0
  8. endoreg_db/config/env.py +13 -8
  9. endoreg_db/data/__init__.py +8 -31
  10. endoreg_db/data/_examples/disease.yaml +55 -0
  11. endoreg_db/data/_examples/disease_classification.yaml +13 -0
  12. endoreg_db/data/_examples/disease_classification_choice.yaml +62 -0
  13. endoreg_db/data/_examples/event.yaml +64 -0
  14. endoreg_db/data/_examples/examination.yaml +72 -0
  15. endoreg_db/data/_examples/finding/anatomy_colon.yaml +128 -0
  16. endoreg_db/data/_examples/finding/colonoscopy.yaml +40 -0
  17. endoreg_db/data/_examples/finding/colonoscopy_bowel_prep.yaml +56 -0
  18. endoreg_db/data/_examples/finding/complication.yaml +16 -0
  19. endoreg_db/data/_examples/finding/data.yaml +105 -0
  20. endoreg_db/data/_examples/finding/examination_setting.yaml +16 -0
  21. endoreg_db/data/_examples/finding/medication_related.yaml +18 -0
  22. endoreg_db/data/_examples/finding/outcome.yaml +12 -0
  23. endoreg_db/data/_examples/finding_classification/colonoscopy_bowel_preparation.yaml +68 -0
  24. endoreg_db/data/_examples/finding_classification/colonoscopy_jnet.yaml +22 -0
  25. endoreg_db/data/_examples/finding_classification/colonoscopy_kudo.yaml +25 -0
  26. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_circularity.yaml +20 -0
  27. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_planarity.yaml +24 -0
  28. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_size.yaml +68 -0
  29. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_surface.yaml +20 -0
  30. endoreg_db/data/_examples/finding_classification/colonoscopy_location.yaml +80 -0
  31. endoreg_db/data/_examples/finding_classification/colonoscopy_lst.yaml +21 -0
  32. endoreg_db/data/_examples/finding_classification/colonoscopy_nice.yaml +20 -0
  33. endoreg_db/data/_examples/finding_classification/colonoscopy_paris.yaml +26 -0
  34. endoreg_db/data/_examples/finding_classification/colonoscopy_sano.yaml +22 -0
  35. endoreg_db/data/_examples/finding_classification/colonoscopy_summary.yaml +53 -0
  36. endoreg_db/data/_examples/finding_classification/complication_generic.yaml +25 -0
  37. endoreg_db/data/_examples/finding_classification/examination_setting_generic.yaml +40 -0
  38. endoreg_db/data/_examples/finding_classification/histology_colo.yaml +51 -0
  39. endoreg_db/data/_examples/finding_classification/intervention_required.yaml +26 -0
  40. endoreg_db/data/_examples/finding_classification/medication_related.yaml +23 -0
  41. endoreg_db/data/_examples/finding_classification/visualized.yaml +33 -0
  42. endoreg_db/data/_examples/finding_classification_choice/bowel_preparation.yaml +78 -0
  43. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_circularity_default.yaml +32 -0
  44. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_jnet.yaml +15 -0
  45. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_kudo.yaml +23 -0
  46. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_lst.yaml +15 -0
  47. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_nice.yaml +17 -0
  48. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_paris.yaml +57 -0
  49. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_planarity_default.yaml +49 -0
  50. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_sano.yaml +14 -0
  51. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_surface_intact_default.yaml +36 -0
  52. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_location.yaml +229 -0
  53. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_not_complete_reason.yaml +19 -0
  54. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_size.yaml +82 -0
  55. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +15 -0
  56. endoreg_db/data/_examples/finding_classification_choice/complication_generic_types.yaml +15 -0
  57. endoreg_db/data/_examples/finding_classification_choice/examination_setting_generic_types.yaml +15 -0
  58. endoreg_db/data/_examples/finding_classification_choice/histology.yaml +24 -0
  59. endoreg_db/data/_examples/finding_classification_choice/histology_polyp.yaml +20 -0
  60. endoreg_db/data/_examples/finding_classification_choice/outcome.yaml +19 -0
  61. endoreg_db/data/_examples/finding_classification_choice/yes_no_na.yaml +11 -0
  62. endoreg_db/data/_examples/finding_classification_type/colonoscopy_basic.yaml +48 -0
  63. endoreg_db/data/_examples/finding_intervention/endoscopy.yaml +43 -0
  64. endoreg_db/data/_examples/finding_intervention/endoscopy_colonoscopy.yaml +168 -0
  65. endoreg_db/data/_examples/finding_intervention/endoscopy_egd.yaml +128 -0
  66. endoreg_db/data/_examples/finding_intervention/endoscopy_ercp.yaml +32 -0
  67. endoreg_db/data/_examples/finding_intervention/endoscopy_eus_lower.yaml +9 -0
  68. endoreg_db/data/_examples/finding_intervention/endoscopy_eus_upper.yaml +36 -0
  69. endoreg_db/data/_examples/finding_intervention_type/endoscopy.yaml +15 -0
  70. endoreg_db/data/_examples/finding_type/data.yaml +43 -0
  71. endoreg_db/data/_examples/requirement/age.yaml +26 -0
  72. endoreg_db/data/_examples/requirement/colonoscopy_baseline_austria.yaml +45 -0
  73. endoreg_db/data/_examples/requirement/disease_cardiovascular.yaml +79 -0
  74. endoreg_db/data/_examples/requirement/disease_classification_choice_cardiovascular.yaml +41 -0
  75. endoreg_db/data/_examples/requirement/disease_hepatology.yaml +12 -0
  76. endoreg_db/data/_examples/requirement/disease_misc.yaml +12 -0
  77. endoreg_db/data/_examples/requirement/disease_renal.yaml +96 -0
  78. endoreg_db/data/_examples/requirement/endoscopy_bleeding_risk.yaml +59 -0
  79. endoreg_db/data/_examples/requirement/event_cardiology.yaml +251 -0
  80. endoreg_db/data/_examples/requirement/event_requirements.yaml +145 -0
  81. endoreg_db/data/_examples/requirement/finding_colon_polyp.yaml +50 -0
  82. endoreg_db/data/_examples/requirement/gender.yaml +25 -0
  83. endoreg_db/data/_examples/requirement/lab_value.yaml +441 -0
  84. endoreg_db/data/_examples/requirement/medication.yaml +93 -0
  85. endoreg_db/data/_examples/requirement_operator/age.yaml +13 -0
  86. endoreg_db/data/_examples/requirement_operator/lab_operators.yaml +129 -0
  87. endoreg_db/data/_examples/requirement_operator/model_operators.yaml +96 -0
  88. endoreg_db/data/_examples/requirement_set/01_endoscopy_generic.yaml +48 -0
  89. endoreg_db/data/_examples/requirement_set/colonoscopy_austria_screening.yaml +57 -0
  90. endoreg_db/data/_examples/yaml_examples.xlsx +0 -0
  91. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +4 -3
  92. endoreg_db/data/event_classification/data.yaml +4 -0
  93. endoreg_db/data/event_classification_choice/data.yaml +9 -0
  94. endoreg_db/data/finding_classification/colonoscopy_bowel_preparation.yaml +43 -70
  95. endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +22 -52
  96. endoreg_db/data/finding_classification/colonoscopy_location.yaml +31 -62
  97. endoreg_db/data/finding_classification/histology_colo.yaml +28 -36
  98. endoreg_db/data/requirement/colon_polyp_intervention.yaml +49 -0
  99. endoreg_db/data/requirement/coloreg_colon_polyp.yaml +49 -0
  100. endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +31 -12
  101. endoreg_db/data/requirement_set/01_laboratory.yaml +13 -0
  102. endoreg_db/data/requirement_set/02_endoscopy_bleeding_risk.yaml +46 -0
  103. endoreg_db/data/requirement_set/90_coloreg.yaml +178 -0
  104. endoreg_db/data/requirement_set/_old_ +109 -0
  105. endoreg_db/data/requirement_set_type/data.yaml +21 -0
  106. endoreg_db/data/setup_config.yaml +4 -4
  107. endoreg_db/data/tag/requirement_set_tags.yaml +21 -0
  108. endoreg_db/exceptions.py +5 -2
  109. endoreg_db/helpers/data_loader.py +1 -1
  110. endoreg_db/management/commands/create_model_meta_from_huggingface.py +21 -10
  111. endoreg_db/management/commands/create_multilabel_model_meta.py +299 -129
  112. endoreg_db/management/commands/import_video.py +9 -10
  113. endoreg_db/management/commands/import_video_with_classification.py +1 -1
  114. endoreg_db/management/commands/init_default_ai_model.py +1 -1
  115. endoreg_db/management/commands/list_routes.py +18 -0
  116. endoreg_db/management/commands/load_ai_model_data.py +2 -1
  117. endoreg_db/management/commands/load_center_data.py +12 -12
  118. endoreg_db/management/commands/load_requirement_data.py +60 -31
  119. endoreg_db/management/commands/load_requirement_set_tags.py +95 -0
  120. endoreg_db/management/commands/setup_endoreg_db.py +14 -10
  121. endoreg_db/management/commands/storage_management.py +271 -203
  122. endoreg_db/migrations/0001_initial.py +1799 -1300
  123. endoreg_db/migrations/0002_requirementset_depends_on.py +18 -0
  124. endoreg_db/migrations/_old/0001_initial.py +1857 -0
  125. endoreg_db/migrations/_old/0004_employee_city_employee_post_code_employee_street_and_more.py +68 -0
  126. endoreg_db/migrations/_old/0004_remove_casetemplate_rules_and_more.py +77 -0
  127. endoreg_db/migrations/_old/0005_merge_20251111_1003.py +14 -0
  128. endoreg_db/migrations/_old/0006_sensitivemeta_anonymized_text_and_more.py +68 -0
  129. endoreg_db/migrations/_old/0007_remove_rule_attribute_dtype_remove_rule_rule_type_and_more.py +89 -0
  130. endoreg_db/migrations/_old/0008_remove_event_event_classification_and_more.py +27 -0
  131. endoreg_db/migrations/_old/0009_alter_modelmeta_options_and_more.py +21 -0
  132. endoreg_db/models/__init__.py +78 -123
  133. endoreg_db/models/administration/__init__.py +21 -42
  134. endoreg_db/models/administration/ai/active_model.py +2 -2
  135. endoreg_db/models/administration/ai/ai_model.py +7 -6
  136. endoreg_db/models/administration/case/__init__.py +1 -15
  137. endoreg_db/models/administration/case/case.py +3 -3
  138. endoreg_db/models/administration/case/case_template/__init__.py +2 -14
  139. endoreg_db/models/administration/case/case_template/case_template.py +2 -124
  140. endoreg_db/models/administration/case/case_template/case_template_rule.py +2 -268
  141. endoreg_db/models/administration/case/case_template/case_template_rule_value.py +2 -85
  142. endoreg_db/models/administration/case/case_template/case_template_type.py +2 -25
  143. endoreg_db/models/administration/center/center.py +33 -19
  144. endoreg_db/models/administration/center/center_product.py +12 -9
  145. endoreg_db/models/administration/center/center_resource.py +25 -19
  146. endoreg_db/models/administration/center/center_shift.py +21 -17
  147. endoreg_db/models/administration/center/center_waste.py +16 -8
  148. endoreg_db/models/administration/person/__init__.py +2 -0
  149. endoreg_db/models/administration/person/employee/employee.py +10 -5
  150. endoreg_db/models/administration/person/employee/employee_qualification.py +9 -4
  151. endoreg_db/models/administration/person/employee/employee_type.py +12 -6
  152. endoreg_db/models/administration/person/examiner/examiner.py +13 -11
  153. endoreg_db/models/administration/person/patient/__init__.py +2 -0
  154. endoreg_db/models/administration/person/patient/patient.py +103 -100
  155. endoreg_db/models/administration/person/patient/patient_external_id.py +37 -0
  156. endoreg_db/models/administration/person/person.py +4 -0
  157. endoreg_db/models/administration/person/profession/__init__.py +8 -4
  158. endoreg_db/models/administration/person/user/portal_user_information.py +11 -7
  159. endoreg_db/models/administration/product/product.py +20 -15
  160. endoreg_db/models/administration/product/product_material.py +17 -18
  161. endoreg_db/models/administration/product/product_weight.py +12 -8
  162. endoreg_db/models/administration/product/reference_product.py +23 -55
  163. endoreg_db/models/administration/qualification/qualification.py +7 -3
  164. endoreg_db/models/administration/qualification/qualification_type.py +7 -3
  165. endoreg_db/models/administration/shift/scheduled_days.py +8 -5
  166. endoreg_db/models/administration/shift/shift.py +16 -12
  167. endoreg_db/models/administration/shift/shift_type.py +23 -31
  168. endoreg_db/models/label/__init__.py +7 -8
  169. endoreg_db/models/label/annotation/image_classification.py +10 -9
  170. endoreg_db/models/label/annotation/video_segmentation_annotation.py +8 -5
  171. endoreg_db/models/label/label.py +15 -15
  172. endoreg_db/models/label/label_set.py +19 -6
  173. endoreg_db/models/label/label_type.py +1 -1
  174. endoreg_db/models/label/label_video_segment/_create_from_video.py +5 -8
  175. endoreg_db/models/label/label_video_segment/label_video_segment.py +76 -102
  176. endoreg_db/models/label/video_segmentation_label.py +4 -0
  177. endoreg_db/models/label/video_segmentation_labelset.py +4 -3
  178. endoreg_db/models/media/frame/frame.py +22 -22
  179. endoreg_db/models/media/pdf/raw_pdf.py +249 -177
  180. endoreg_db/models/media/pdf/report_file.py +25 -29
  181. endoreg_db/models/media/pdf/report_reader/report_reader_config.py +30 -46
  182. endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +23 -7
  183. endoreg_db/models/media/video/__init__.py +1 -0
  184. endoreg_db/models/media/video/create_from_file.py +48 -56
  185. endoreg_db/models/media/video/pipe_1.py +30 -33
  186. endoreg_db/models/media/video/pipe_2.py +8 -9
  187. endoreg_db/models/media/video/video_file.py +359 -204
  188. endoreg_db/models/media/video/video_file_ai.py +288 -74
  189. endoreg_db/models/media/video/video_file_anonymize.py +38 -38
  190. endoreg_db/models/media/video/video_file_frames/__init__.py +3 -1
  191. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -8
  192. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +7 -9
  193. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +9 -8
  194. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +38 -45
  195. endoreg_db/models/media/video/video_file_frames/_get_frame.py +6 -8
  196. endoreg_db/models/media/video/video_file_frames/_get_frame_number.py +4 -18
  197. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -3
  198. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +7 -6
  199. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +6 -8
  200. endoreg_db/models/media/video/video_file_frames/_get_frames.py +6 -8
  201. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +15 -25
  202. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +26 -23
  203. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +23 -14
  204. endoreg_db/models/media/video/video_file_io.py +109 -62
  205. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +3 -3
  206. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +5 -3
  207. endoreg_db/models/media/video/video_file_meta/get_fps.py +37 -34
  208. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +19 -25
  209. endoreg_db/models/media/video/video_file_meta/text_meta.py +41 -38
  210. endoreg_db/models/media/video/video_file_meta/video_meta.py +14 -7
  211. endoreg_db/models/media/video/video_file_segments.py +24 -17
  212. endoreg_db/models/media/video/video_metadata.py +19 -35
  213. endoreg_db/models/media/video/video_processing.py +96 -95
  214. endoreg_db/models/medical/contraindication/__init__.py +13 -3
  215. endoreg_db/models/medical/disease.py +22 -16
  216. endoreg_db/models/medical/event.py +31 -18
  217. endoreg_db/models/medical/examination/__init__.py +13 -6
  218. endoreg_db/models/medical/examination/examination.py +17 -18
  219. endoreg_db/models/medical/examination/examination_indication.py +26 -25
  220. endoreg_db/models/medical/examination/examination_time.py +16 -6
  221. endoreg_db/models/medical/examination/examination_time_type.py +9 -6
  222. endoreg_db/models/medical/examination/examination_type.py +3 -4
  223. endoreg_db/models/medical/finding/finding.py +38 -39
  224. endoreg_db/models/medical/finding/finding_classification.py +37 -48
  225. endoreg_db/models/medical/finding/finding_intervention.py +27 -22
  226. endoreg_db/models/medical/finding/finding_type.py +13 -12
  227. endoreg_db/models/medical/hardware/endoscope.py +20 -26
  228. endoreg_db/models/medical/hardware/endoscopy_processor.py +2 -2
  229. endoreg_db/models/medical/laboratory/lab_value.py +62 -91
  230. endoreg_db/models/medical/medication/medication.py +22 -10
  231. endoreg_db/models/medical/medication/medication_indication.py +29 -3
  232. endoreg_db/models/medical/medication/medication_indication_type.py +25 -14
  233. endoreg_db/models/medical/medication/medication_intake_time.py +31 -19
  234. endoreg_db/models/medical/medication/medication_schedule.py +27 -16
  235. endoreg_db/models/medical/organ/__init__.py +15 -12
  236. endoreg_db/models/medical/patient/medication_examples.py +1 -5
  237. endoreg_db/models/medical/patient/patient_disease.py +20 -23
  238. endoreg_db/models/medical/patient/patient_event.py +19 -22
  239. endoreg_db/models/medical/patient/patient_examination.py +48 -54
  240. endoreg_db/models/medical/patient/patient_examination_indication.py +16 -14
  241. endoreg_db/models/medical/patient/patient_finding.py +122 -139
  242. endoreg_db/models/medical/patient/patient_finding_classification.py +44 -49
  243. endoreg_db/models/medical/patient/patient_finding_intervention.py +8 -19
  244. endoreg_db/models/medical/patient/patient_lab_sample.py +28 -23
  245. endoreg_db/models/medical/patient/patient_lab_value.py +82 -89
  246. endoreg_db/models/medical/patient/patient_medication.py +27 -38
  247. endoreg_db/models/medical/patient/patient_medication_schedule.py +28 -36
  248. endoreg_db/models/medical/risk/risk.py +7 -6
  249. endoreg_db/models/medical/risk/risk_type.py +8 -5
  250. endoreg_db/models/metadata/model_meta.py +60 -29
  251. endoreg_db/models/metadata/model_meta_logic.py +139 -18
  252. endoreg_db/models/metadata/pdf_meta.py +19 -24
  253. endoreg_db/models/metadata/sensitive_meta.py +102 -85
  254. endoreg_db/models/metadata/sensitive_meta_logic.py +383 -43
  255. endoreg_db/models/metadata/video_meta.py +51 -31
  256. endoreg_db/models/metadata/video_prediction_logic.py +16 -23
  257. endoreg_db/models/metadata/video_prediction_meta.py +29 -33
  258. endoreg_db/models/other/distribution/date_value_distribution.py +89 -29
  259. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +21 -5
  260. endoreg_db/models/other/distribution/numeric_value_distribution.py +114 -53
  261. endoreg_db/models/other/distribution/single_categorical_value_distribution.py +4 -3
  262. endoreg_db/models/other/emission/emission_factor.py +18 -8
  263. endoreg_db/models/other/gender.py +10 -5
  264. endoreg_db/models/other/information_source.py +25 -25
  265. endoreg_db/models/other/material.py +9 -5
  266. endoreg_db/models/other/resource.py +6 -4
  267. endoreg_db/models/other/tag.py +10 -5
  268. endoreg_db/models/other/transport_route.py +13 -8
  269. endoreg_db/models/other/unit.py +10 -6
  270. endoreg_db/models/other/waste.py +6 -5
  271. endoreg_db/models/requirement/requirement.py +580 -272
  272. endoreg_db/models/requirement/requirement_error.py +85 -0
  273. endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +268 -0
  274. endoreg_db/models/requirement/requirement_evaluation/operator_evaluation_models.py +3 -6
  275. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +90 -64
  276. endoreg_db/models/requirement/requirement_operator.py +36 -33
  277. endoreg_db/models/requirement/requirement_set.py +74 -57
  278. endoreg_db/models/state/__init__.py +4 -4
  279. endoreg_db/models/state/abstract.py +2 -2
  280. endoreg_db/models/state/anonymization.py +12 -0
  281. endoreg_db/models/state/audit_ledger.py +46 -47
  282. endoreg_db/models/state/label_video_segment.py +9 -0
  283. endoreg_db/models/state/raw_pdf.py +40 -46
  284. endoreg_db/models/state/sensitive_meta.py +6 -2
  285. endoreg_db/models/state/video.py +58 -53
  286. endoreg_db/models/upload_job.py +32 -55
  287. endoreg_db/models/utils.py +1 -2
  288. endoreg_db/root_urls.py +21 -2
  289. endoreg_db/serializers/__init__.py +26 -57
  290. endoreg_db/serializers/anonymization.py +18 -10
  291. endoreg_db/serializers/meta/report_meta.py +1 -1
  292. endoreg_db/serializers/meta/sensitive_meta_detail.py +63 -118
  293. endoreg_db/serializers/misc/__init__.py +1 -1
  294. endoreg_db/serializers/misc/file_overview.py +33 -91
  295. endoreg_db/serializers/misc/{vop_patient_data.py → sensitive_patient_data.py} +1 -1
  296. endoreg_db/serializers/requirements/requirement_sets.py +92 -22
  297. endoreg_db/serializers/video/segmentation.py +2 -1
  298. endoreg_db/serializers/video/video_processing_history.py +20 -5
  299. endoreg_db/serializers/video_examination.py +198 -0
  300. endoreg_db/services/anonymization.py +75 -73
  301. endoreg_db/services/lookup_service.py +256 -73
  302. endoreg_db/services/lookup_store.py +174 -30
  303. endoreg_db/services/pdf_import.py +711 -310
  304. endoreg_db/services/storage_aware_video_processor.py +140 -114
  305. endoreg_db/services/video_import.py +266 -117
  306. endoreg_db/urls/__init__.py +27 -27
  307. endoreg_db/urls/label_video_segments.py +2 -0
  308. endoreg_db/urls/media.py +108 -66
  309. endoreg_db/urls/root_urls.py +29 -0
  310. endoreg_db/utils/__init__.py +15 -5
  311. endoreg_db/utils/ai/multilabel_classification_net.py +116 -20
  312. endoreg_db/utils/case_generator/__init__.py +3 -0
  313. endoreg_db/utils/dataloader.py +88 -16
  314. endoreg_db/utils/defaults/set_default_center.py +32 -0
  315. endoreg_db/utils/names.py +22 -16
  316. endoreg_db/utils/permissions.py +2 -1
  317. endoreg_db/utils/pipelines/process_video_dir.py +1 -1
  318. endoreg_db/utils/requirement_operator_logic/model_evaluators.py +414 -127
  319. endoreg_db/utils/setup_config.py +8 -5
  320. endoreg_db/utils/storage.py +115 -0
  321. endoreg_db/utils/validate_endo_roi.py +8 -2
  322. endoreg_db/utils/video/ffmpeg_wrapper.py +184 -188
  323. endoreg_db/views/__init__.py +5 -12
  324. endoreg_db/views/anonymization/media_management.py +198 -163
  325. endoreg_db/views/anonymization/overview.py +4 -1
  326. endoreg_db/views/anonymization/validate.py +174 -40
  327. endoreg_db/views/media/__init__.py +2 -0
  328. endoreg_db/views/media/pdf_media.py +131 -150
  329. endoreg_db/views/media/sensitive_metadata.py +46 -6
  330. endoreg_db/views/media/video_media.py +89 -82
  331. endoreg_db/views/media/video_segments.py +187 -260
  332. endoreg_db/views/meta/sensitive_meta_detail.py +0 -63
  333. endoreg_db/views/patient/patient.py +5 -4
  334. endoreg_db/views/pdf/__init__.py +5 -8
  335. endoreg_db/views/pdf/pdf_stream.py +186 -0
  336. endoreg_db/views/pdf/pdf_stream_views.py +0 -127
  337. endoreg_db/views/pdf/reimport.py +86 -91
  338. endoreg_db/views/requirement/evaluate.py +188 -187
  339. endoreg_db/views/requirement/lookup.py +186 -288
  340. endoreg_db/views/requirement/requirement_utils.py +89 -0
  341. endoreg_db/views/video/__init__.py +0 -4
  342. endoreg_db/views/video/correction.py +2 -2
  343. endoreg_db/views/video/video_examination_viewset.py +202 -289
  344. {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/METADATA +7 -3
  345. {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/RECORD +350 -255
  346. endoreg_db/models/administration/permissions/__init__.py +0 -44
  347. endoreg_db/models/media/video/refactor_plan.md +0 -0
  348. endoreg_db/models/media/video/video_file_frames.py +0 -0
  349. endoreg_db/models/metadata/frame_ocr_result.py +0 -0
  350. endoreg_db/models/rule/__init__.py +0 -13
  351. endoreg_db/models/rule/rule.py +0 -27
  352. endoreg_db/models/rule/rule_applicator.py +0 -224
  353. endoreg_db/models/rule/rule_attribute_dtype.py +0 -17
  354. endoreg_db/models/rule/rule_type.py +0 -20
  355. endoreg_db/models/rule/ruleset.py +0 -17
  356. endoreg_db/serializers/video/video_metadata.py +0 -105
  357. endoreg_db/urls/report.py +0 -48
  358. endoreg_db/urls/video.py +0 -61
  359. endoreg_db/utils/case_generator/case_generator.py +0 -159
  360. endoreg_db/utils/case_generator/utils.py +0 -30
  361. endoreg_db/views/pdf/pdf_media.py +0 -239
  362. endoreg_db/views/report/__init__.py +0 -9
  363. endoreg_db/views/report/report_list.py +0 -112
  364. endoreg_db/views/report/report_with_secure_url.py +0 -28
  365. endoreg_db/views/report/start_examination.py +0 -7
  366. endoreg_db/views/video/video_media.py +0 -158
  367. endoreg_db/views.py +0 -0
  368. /endoreg_db/data/{requirement_set → _examples/requirement_set}/endoscopy_bleeding_risk.yaml +0 -0
  369. /endoreg_db/migrations/{0002_add_video_correction_models.py → _old/0002_add_video_correction_models.py} +0 -0
  370. /endoreg_db/migrations/{0003_add_center_display_name.py → _old/0003_add_center_display_name.py} +0 -0
  371. {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/WHEEL +0 -0
  372. {endoreg_db-0.8.4.4.dist-info → endoreg_db-0.8.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -4,38 +4,39 @@
4
4
  # Class contains classmethod to create object from pdf file
5
5
  # objects contains methods to extract text, extract metadata from text and anonymize text from pdf file uzing agl_report_reader.ReportReader class
6
6
  # ------------------------------------------------------------------------------
7
- import os
8
- from django.db import models
7
+ from typing import TYPE_CHECKING, Optional, cast
8
+
9
9
  from django.core.exceptions import ValidationError
10
- from django.core.validators import FileExtensionValidator
11
10
  from django.core.files import File
12
- from numpy import isin # Import Django File
11
+ from django.core.validators import FileExtensionValidator
12
+ from django.db import models
13
+
13
14
  from endoreg_db.utils.file_operations import get_uuid_filename
14
- from typing import TYPE_CHECKING, Optional, Union
15
- # Use the specific paths from the centralized paths module
16
- from ...utils import PDF_DIR
17
15
  from endoreg_db.utils.hashs import get_pdf_hash
16
+ from endoreg_db.utils.paths import PDF_DIR
17
+ from endoreg_db.utils.storage import (
18
+ delete_field_file,
19
+ ensure_local_file,
20
+ file_exists,
21
+ save_local_file,
22
+ )
18
23
 
19
24
  if TYPE_CHECKING:
20
- from endoreg_db.models.administration.person import (
21
- Patient,
22
- Examiner,
23
- )
24
- from .report_file import AnonymExaminationReport
25
- from ...medical.patient import PatientExamination
26
- from ...administration import Center
27
- from ...metadata.pdf_meta import PdfType
28
- from ...state import RawPdfState
29
- from ...metadata import SensitiveMeta
25
+ from django.db.models.fields.files import FieldFile
26
+
27
+ from endoreg_db.models.state import RawPdfState
30
28
 
31
29
  # setup logging to pdf_import.log
32
30
  import logging
33
-
34
31
  from pathlib import Path
35
32
 
33
+ from ...metadata import SensitiveMeta
34
+
36
35
  logger = logging.getLogger("raw_pdf")
37
36
 
37
+
38
38
  class RawPdfFile(models.Model):
39
+ objects = models.Manager()
39
40
  # Fields from AbstractPdfFile
40
41
  pdf_hash = models.CharField(max_length=255, unique=True)
41
42
  pdf_type = models.ForeignKey(
@@ -49,72 +50,128 @@ class RawPdfFile(models.Model):
49
50
  on_delete=models.SET_NULL,
50
51
  blank=True,
51
52
  null=True,
52
- ) # type: ignore
53
+ )
53
54
  examination = models.ForeignKey(
54
55
  "PatientExamination",
55
56
  on_delete=models.SET_NULL,
56
57
  blank=True,
57
58
  null=True,
58
59
  related_name="raw_pdf_files",
59
- ) # type: ignore
60
+ )
60
61
  examiner = models.ForeignKey(
61
62
  "Examiner",
62
63
  on_delete=models.SET_NULL,
63
64
  blank=True,
64
65
  null=True,
65
- ) # type: ignore
66
+ )
66
67
  text = models.TextField(blank=True, null=True)
67
68
  date_created = models.DateTimeField(auto_now_add=True)
68
69
  date_modified = models.DateTimeField(auto_now=True)
69
70
  anonymized = models.BooleanField(default=False, help_text="True if the PDF has been anonymized.")
70
-
71
- # Fields specific to RawPdfFile (keeping existing related_names)
72
71
  file = models.FileField(
73
72
  # Use the relative path from the specific PDF_DIR
74
73
  upload_to=PDF_DIR.name,
75
74
  validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
76
- ) # type: ignore
77
-
75
+ )
78
76
  anonymized_file = models.FileField(
79
77
  upload_to=PDF_DIR.name,
80
78
  validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
81
79
  null=True,
82
80
  blank=True,
83
- ) # type: ignore
84
-
81
+ )
85
82
  state = models.OneToOneField(
86
83
  "RawPdfState",
87
84
  on_delete=models.SET_NULL,
88
85
  blank=True,
89
86
  null=True,
90
87
  related_name="raw_pdf_file",
91
- ) # type: ignore
92
-
93
- objects = models.Manager()
94
-
88
+ )
89
+ patient = models.ForeignKey(
90
+ "Patient",
91
+ on_delete=models.SET_NULL,
92
+ blank=True,
93
+ null=True,
94
+ related_name="raw_pdf_files",
95
+ )
96
+ sensitive_meta = models.ForeignKey(
97
+ "SensitiveMeta",
98
+ on_delete=models.SET_NULL,
99
+ related_name="raw_pdf_files",
100
+ null=True,
101
+ blank=True,
102
+ )
103
+ state_report_processing_required = models.BooleanField(default=True)
104
+ state_report_processed = models.BooleanField(default=False)
105
+ raw_meta = models.JSONField(blank=True, null=True)
106
+ anonym_examination_report = models.OneToOneField(
107
+ "AnonymExaminationReport",
108
+ on_delete=models.SET_NULL,
109
+ blank=True,
110
+ null=True,
111
+ related_name="raw_pdf_file",
112
+ )
113
+ anonymized_text = models.TextField(blank=True, null=True)
114
+
115
+ # Type hinting is needed, improve and use correct django types
116
+ if TYPE_CHECKING:
117
+ from endoreg_db.models import (
118
+ AnonymExaminationReport,
119
+ Center,
120
+ Examiner,
121
+ Patient,
122
+ PatientExamination,
123
+ RawPdfState,
124
+ SensitiveMeta,
125
+ )
126
+
127
+ center: models.ForeignKey["Center | None"]
128
+ examination: models.ForeignKey["PatientExamination | None"]
129
+ examiner: models.ForeignKey["Examiner | None"]
130
+ state: models.ForeignKey["RawPdfState | None"]
131
+ patient: models.ForeignKey["Patient | None"]
132
+ sensitive_meta: models.ForeignKey["SensitiveMeta | None"]
133
+ anonym_examination_report: models.OneToOneField["AnonymExaminationReport | None"]
134
+ file = cast(FieldFile, file)
135
+ anonymized_file = cast(FieldFile, anonymized_file)
136
+
95
137
  @property
96
- def file_path(self) -> Path|None:
138
+ def uuid(self):
139
+ """
140
+ Compatibility property - returns pdf_hash as UUID-like identifier.
141
+
142
+ Note: RawPdfFile uses pdf_hash instead of UUID for identification.
143
+ This property exists for API backward compatibility.
144
+ """
145
+ return self.pdf_hash
146
+
147
+ @property
148
+ def file_path(self) -> Path | None:
97
149
  """
98
150
  Returns the file path of the stored PDF file if available; otherwise, returns None.
99
151
  """
152
+ from django.db.models.fields.files import FieldFile
153
+
100
154
  # assert self.file has path attribute
101
- assert isinstance(self.file, models.FieldFile)
155
+ assert isinstance(self.file, FieldFile)
102
156
  if self.file and self.file.name:
103
157
  try:
104
158
  return Path(self.file.path)
105
159
  except (ValueError, AttributeError, NotImplementedError):
106
160
  return None
107
161
  return None
108
-
162
+
109
163
  def set_file_path(self, file_path: Path):
110
164
  """
111
165
  Sets the file path of the stored PDF file.
112
166
  """
113
- self.file = File(file_path) # type: ignore
114
- self.save(update_fields=['file'])
167
+ if not file_path.exists():
168
+ raise FileNotFoundError(f"File path does not exist: {file_path}")
169
+
170
+ save_local_file(self.file, file_path, name=file_path.name, save=False)
171
+ self.save(update_fields=["file"])
115
172
 
116
173
  @property
117
- def anonymized_file_path(self) -> Path|None:
174
+ def anonymized_file_path(self) -> Path | None:
118
175
  """
119
176
  Returns the file path of the anonymized PDF file if available; otherwise, returns None.
120
177
  """
@@ -124,13 +181,76 @@ class RawPdfFile(models.Model):
124
181
  except (ValueError, AttributeError, NotImplementedError):
125
182
  return None
126
183
  return None
127
-
184
+
128
185
  def set_anonymized_file_path(self, file_path: Path):
129
186
  """
130
187
  Sets the file path of the anonymized PDF file.
131
188
  """
132
- self.anonymized_file = File(file_path) # type: ignore
133
- self.save(update_fields=['anonymized_file'])
189
+ if not file_path.exists():
190
+ raise FileNotFoundError(f"File path does not exist: {file_path}")
191
+
192
+ save_local_file(self.anonymized_file, file_path, name=file_path.name, save=False)
193
+ self.save(update_fields=["anonymized_file"])
194
+
195
+ def get_raw_file_path(self) -> Optional[Path]:
196
+ """
197
+ Get the path to the raw PDF file, searching common locations.
198
+
199
+ This method attempts to find the original raw PDF file by checking:
200
+ 1. Direct hash-based path in raw_pdfs/
201
+ 2. Scanning raw_pdfs/ directory for files matching the hash
202
+ 3. Checking the file field if it exists
203
+
204
+ Returns:
205
+ Path to raw file if it exists, None otherwise
206
+ """
207
+ from django.conf import settings
208
+
209
+ # Check if file field already points to a valid file
210
+ if self.file and self.file.name:
211
+ try:
212
+ file_path = Path(self.file.path)
213
+ if file_path.exists():
214
+ logger.debug(f"Found raw PDF via file field: {file_path}")
215
+ return file_path
216
+ except (ValueError, AttributeError, NotImplementedError):
217
+ pass
218
+
219
+ # Define potential raw directories
220
+ raw_dirs = [
221
+ PDF_DIR / "sensitive", # Files might be in sensitive dir
222
+ Path(settings.BASE_DIR) / "data" / "raw_pdfs",
223
+ Path(settings.BASE_DIR) / "data" / "pdfs" / "raw",
224
+ PDF_DIR, # General PDF directory
225
+ ]
226
+
227
+ # Check direct hash-based name in each directory
228
+ for raw_dir in raw_dirs:
229
+ if not raw_dir.exists():
230
+ continue
231
+
232
+ hash_path = raw_dir / f"{self.pdf_hash}.pdf"
233
+ if hash_path.exists():
234
+ logger.debug(f"Found raw PDF at: {hash_path}")
235
+ return hash_path
236
+
237
+ # Scan directories for matching hash
238
+ for raw_dir in raw_dirs:
239
+ if not raw_dir.exists():
240
+ continue
241
+
242
+ for file_path in raw_dir.glob("*.pdf"):
243
+ try:
244
+ file_hash = get_pdf_hash(file_path)
245
+ if file_hash == self.pdf_hash:
246
+ logger.debug(f"Found matching PDF by hash: {file_path}")
247
+ return file_path
248
+ except Exception as e:
249
+ logger.debug(f"Error checking {file_path}: {e}")
250
+ continue
251
+
252
+ logger.warning(f"No raw file found for PDF hash: {self.pdf_hash}")
253
+ return None
134
254
 
135
255
  @property
136
256
  def file_url(self):
@@ -141,7 +261,7 @@ class RawPdfFile(models.Model):
141
261
  return self.file.url if self.file and self.file.name else None
142
262
  except (ValueError, AttributeError):
143
263
  return None
144
-
264
+
145
265
  @property
146
266
  def anonymized_file_url(self):
147
267
  """
@@ -152,45 +272,6 @@ class RawPdfFile(models.Model):
152
272
  except (ValueError, AttributeError):
153
273
  return None
154
274
 
155
- patient = models.ForeignKey(
156
- "Patient",
157
- on_delete=models.SET_NULL,
158
- blank=True,
159
- null=True,
160
- related_name="raw_pdf_files",
161
- ) # type: ignore
162
- sensitive_meta = models.ForeignKey(
163
- "SensitiveMeta",
164
- on_delete=models.SET_NULL,
165
- related_name="raw_pdf_files",
166
- null=True,
167
- blank=True,
168
- ) # type: ignore
169
- state_report_processing_required = models.BooleanField(default=True)
170
- state_report_processed = models.BooleanField(default=False)
171
- raw_meta = models.JSONField(blank=True, null=True)
172
- anonym_examination_report = models.OneToOneField(
173
- "AnonymExaminationReport",
174
- on_delete=models.SET_NULL,
175
- blank=True,
176
- null=True,
177
- related_name="raw_pdf_file",
178
- ) # type: ignore
179
- anonymized_text = models.TextField(blank=True, null=True)
180
-
181
- # Type hinting is needed, improve and use correct django types
182
- if TYPE_CHECKING:
183
- file : Optional[Union[models.FieldFile,models.FileField]]
184
- anonymized_file : Optional[Union[models.FieldFile,models.FileField]]
185
- pdf_type: Optional[models.ForeignKey]
186
- examination: Optional[models.ForeignKey["PatientExamination"]]
187
- examiner: Optional[models.ForeignKey["Examiner"]]
188
- patient: Optional[models.ForeignKey["Patient"]]
189
- center: Optional[models.ForeignKey["Center"]]
190
- anonym_examination_report: Optional[models.OneToOneField["AnonymExaminationReport"]]
191
- sensitive_meta: Optional[models.ForeignKey["SensitiveMeta"]]
192
- state: Optional[models.ForeignKey["RawPdfState"]]
193
-
194
275
  def __str__(self):
195
276
  """
196
277
  Return a string representation of the RawPdfFile, including its PDF hash, type, and center.
@@ -201,72 +282,57 @@ class RawPdfFile(models.Model):
201
282
  def delete(self, *args, **kwargs):
202
283
  """
203
284
  Deletes the RawPdfFile instance from the database and removes the associated file from storage if it exists.
204
-
285
+
205
286
  This method ensures that the physical PDF file is deleted from the file system after the database record is removed. Logs warnings or errors if the file cannot be found or deleted.
206
287
  """
207
- # Call the original delete method first to remove DB record
208
- if self.file:
209
- try:
210
- if self.file_path:
211
- os.remove(Path(self.file_path))
212
- logger.info("Original file removed: %s", self.file)
213
- except Exception as e:
214
- logger.warning(f"Could not get file path for {self.file.name} before deletion: {e}")
215
- if self.anonymized_file:
216
- try:
217
- if self.anonymized_file_path:
218
- os.remove(Path(self.anonymized_file_path))
219
- logger.info("Anonymized file removed: %s", self.anonymized_file.name)
220
- except OSError as e:
221
- logger.error("Error removing anonymized file %s: %s", self.anonymized_file.name, e)
288
+ primary_name = self.file.name if self.file and self.file.name else None
289
+ anonymized_name = self.anonymized_file.name if self.anonymized_file and self.anonymized_file.name else None
222
290
 
223
- super().delete(*args, **kwargs)
291
+ if delete_field_file(self.file, missing_ok=True, save=False):
292
+ logger.info("Original file removed from storage: %s", primary_name)
293
+ if delete_field_file(self.anonymized_file, missing_ok=True, save=False):
294
+ logger.info("Anonymized file removed from storage: %s", anonymized_name)
224
295
 
296
+ super().delete(*args, **kwargs)
225
297
 
226
298
  def validate_metadata_annotation(self, extracted_data_dict: Optional[dict] = None) -> bool:
227
299
  """
228
300
  Validate the metadata of the RawPdf instance.
229
-
301
+
230
302
  Called after annotation in the frontend, this method deletes the associated active file, updates the sensitive meta data with the user annotated data.
231
303
  It also ensures the video file is properly saved after the metadata update.
232
304
  """
233
-
305
+
234
306
  if not self.sensitive_meta:
235
307
  logger.error("No sensitive meta data associated with this PDF file.")
236
308
  return False
237
-
309
+
238
310
  if not extracted_data_dict:
239
311
  logger.error("No extracted data provided for validation.")
240
312
  return False
241
-
242
- # Update sensitive meta with the provided data
243
- self.sensitive_meta.update_from_dict(extracted_data_dict)
244
-
313
+
314
+ if extracted_data_dict:
315
+ self.sensitive_meta.update_from_dict(extracted_data_dict)
316
+ else:
317
+ return False
318
+
245
319
  # Save the sensitive meta to ensure changes are persisted
246
320
  self.sensitive_meta.save()
247
-
321
+
248
322
  # Save the RawPdfFile instance to ensure all changes are saved
249
323
  self.save()
250
-
324
+
251
325
  logger.info(f"Metadata for PDF {self.pk} validated and updated successfully.")
252
-
253
- if self.file_path:
254
- try:
255
- os.unlink(self.file_path) # Delete the original file if it exists
256
- except OSError as e:
257
- logger.error(f"Error removing original file {self.file_path}: {e}")
258
326
 
259
- if self.anonymized_file_path:
260
- try:
261
- os.unlink(self.anonymized_file_path)
262
- except OSError as e:
263
- logger.error(f"Error removing anonymized file {self.anonymized_file_path}: {e}")
327
+ deleted_original = delete_field_file(self.file, missing_ok=True, save=False)
328
+ deleted_anonymized = delete_field_file(self.anonymized_file, missing_ok=True, save=False)
329
+ self.get_or_create_state().mark_anonymization_validated()
330
+
331
+ if deleted_original or deleted_anonymized:
332
+ self.save(update_fields=["file", "anonymized_file"]) # Persist cleared fields
264
333
 
265
- self.save() # Save the model to persist the cleared file fields
266
-
267
334
  logger.info(f"Files for PDF {self.pk} deleted successfully.")
268
335
  return True
269
-
270
336
 
271
337
  @classmethod
272
338
  def create_from_file_initialized(
@@ -277,18 +343,18 @@ class RawPdfFile(models.Model):
277
343
  ):
278
344
  """
279
345
  Creates a RawPdfFile instance from a file and center name, ensuring an associated RawPdfState exists.
280
-
346
+
281
347
  Parameters:
282
348
  file_path (Path): Path to the source PDF file.
283
349
  center_name (str): Name of the center to associate with the PDF.
284
350
  delete_source (bool): Whether to delete the source file after processing. Defaults to True.
285
-
351
+
286
352
  Returns:
287
353
  RawPdfFile: The created or retrieved RawPdfFile instance with an associated RawPdfState.
288
354
  """
289
355
  raw_pdf = cls.create_from_file(
290
356
  file_path=file_path,
291
- center_name=center_name,
357
+ center_name=center_name,
292
358
  delete_source=delete_source,
293
359
  )
294
360
  _state = raw_pdf.get_or_create_state()
@@ -305,18 +371,18 @@ class RawPdfFile(models.Model):
305
371
  ):
306
372
  """
307
373
  Creates or retrieves a RawPdfFile instance from a given PDF file path and center name.
308
-
374
+
309
375
  If a RawPdfFile with the same PDF hash already exists, verifies the file exists in storage and restores it if missing. Otherwise, creates a new RawPdfFile, assigns the file, and saves it to storage. Optionally deletes the source file after processing.
310
-
376
+
311
377
  Parameters:
312
378
  file_path (Path): Path to the source PDF file.
313
379
  center_name (str): Name of the center to associate with the file.
314
380
  save (bool, optional): Deprecated; saving occurs internally.
315
381
  delete_source (bool, optional): Whether to delete the source file after processing (default True).
316
-
382
+
317
383
  Returns:
318
384
  RawPdfFile: The created or retrieved RawPdfFile instance.
319
-
385
+
320
386
  Raises:
321
387
  FileNotFoundError: If the source file does not exist.
322
388
  Center.DoesNotExist: If the specified center is not found.
@@ -338,28 +404,41 @@ class RawPdfFile(models.Model):
338
404
  raise ValueError(f"Could not calculate hash for {file_path}") from e
339
405
 
340
406
  # 2. Check if record with this hash already exists
341
- existing_pdf_file = cls.objects.filter(pdf_hash=pdf_hash).first()
407
+ existing_pdf_file = cls.objects.filter(pdf_hash=pdf_hash).first()
342
408
  if existing_pdf_file:
343
- logger.warning("RawPdfFile with hash %s already exists (ID: %s)", pdf_hash, existing_pdf_file.pk)
409
+ logger.warning(
410
+ "RawPdfFile with hash %s already exists (ID: %s)",
411
+ pdf_hash,
412
+ existing_pdf_file.pk,
413
+ )
344
414
 
345
415
  # Verify physical file exists for the existing record
346
416
  try:
347
417
  if existing_pdf_file is not None and isinstance(existing_pdf_file, cls):
348
- # Use storage API to check existence
418
+ # Use storage API to check existence
349
419
  _file = existing_pdf_file.file
350
420
  assert _file is not None
351
421
  if not _file.storage.exists(_file.name):
352
- logger.warning("File for existing RawPdfFile %s not found in storage at %s. Attempting to restore from source %s", pdf_hash, _file.name, file_path)
422
+ logger.warning(
423
+ "File for existing RawPdfFile %s not found in storage at %s. Attempting to restore from source %s",
424
+ pdf_hash,
425
+ _file.name,
426
+ file_path,
427
+ )
353
428
  # Re-save the file from the source to potentially fix it
354
429
  with file_path.open("rb") as f:
355
430
  django_file = File(f, name=Path(_file.name).name) # Use existing name if possible
356
- existing_pdf_file.file = django_file # type: ignore
357
- existing_pdf_file.save(update_fields=['file']) # Only update file field
431
+ existing_pdf_file.file = django_file
432
+ existing_pdf_file.save(update_fields=["file"]) # Only update file field
358
433
  else:
359
434
  pass
360
435
  # logger.debug("File for existing RawPdfFile %s already exists in storage.", pdf_hash)
361
436
  except Exception as e:
362
- logger.error("Error verifying/restoring file for existing record %s: %s", pdf_hash, e)
437
+ logger.error(
438
+ "Error verifying/restoring file for existing record %s: %s",
439
+ pdf_hash,
440
+ e,
441
+ )
363
442
 
364
443
  # Delete the source temp file if requested
365
444
  if delete_source:
@@ -396,7 +475,9 @@ class RawPdfFile(models.Model):
396
475
  _file = raw_pdf.file
397
476
  assert _file is not None
398
477
  logger.info(
399
- "Created and saved new RawPdfFile %s with file %s", raw_pdf.pk, _file.name
478
+ "Created and saved new RawPdfFile %s with file %s",
479
+ raw_pdf.pk,
480
+ _file.name,
400
481
  )
401
482
 
402
483
  if not _file.storage.exists(_file.name):
@@ -404,9 +485,7 @@ class RawPdfFile(models.Model):
404
485
  "File was not saved correctly to storage path %s after model save.",
405
486
  _file.name,
406
487
  )
407
- raise IOError(
408
- f"File not found at expected storage path after save: {_file.name}"
409
- )
488
+ raise IOError(f"File not found at expected storage path after save: {_file.name}")
410
489
 
411
490
  try:
412
491
  logger.info("File saved to absolute path: %s", _file.path)
@@ -428,7 +507,7 @@ class RawPdfFile(models.Model):
428
507
  except OSError as e:
429
508
  logger.error("Error deleting source file %s: %s", file_path, e)
430
509
 
431
- # raw_pdf.save() # unnecessary?
510
+ # raw_pdf.save() # unnecessary?
432
511
  return raw_pdf
433
512
 
434
513
  def save(self, *args, **kwargs):
@@ -436,26 +515,20 @@ class RawPdfFile(models.Model):
436
515
  # This is primarily a fallback if instance created manually without using create_from_file
437
516
  """
438
517
  Saves the RawPdfFile instance, ensuring the PDF hash is set and related fields are derived from metadata.
439
-
518
+
440
519
  If the PDF hash is missing, attempts to calculate it from the file before saving. Validates that the file has a `.pdf` extension. If related fields such as patient, examination, center, or examiner are unset but available in the associated sensitive metadata, they are populated accordingly before saving.
441
520
  """
442
521
  if not self.pk and not self.pdf_hash and self.file:
443
522
  try:
444
- file_path = Path(self.file.path).resolve()
445
- if not file_path.exists():
446
- raise FileNotFoundError(f"File path does not exist: {file_path}")
447
- # Read from the file object before it's saved by storage
448
- self.file.open('rb') # Ensure file is open
449
- self.file.seek(0) # Go to beginning
450
- self.pdf_hash = get_pdf_hash(file_path) # Assuming get_pdf_hash can handle file obj
451
- self.file.seek(0) # Reset position
452
- self.file.close() # Close after reading
453
- logger.info(f"Calculated hash during pre-save for {self.file.name}")
454
- except Exception as e:
455
- logger.warning("Could not calculate hash before initial save for %s: %s", self.file.name, e)
456
- # Ensure file is closed if opened
457
- if hasattr(self.file, 'closed') and not self.file.closed:
458
- self.file.close()
523
+ with ensure_local_file(self.file) as local_path:
524
+ self.pdf_hash = get_pdf_hash(local_path)
525
+ logger.info("Calculated hash during pre-save for %s", self.file.name)
526
+ except Exception as exc:
527
+ logger.warning(
528
+ "Could not calculate hash before initial save for %s: %s",
529
+ self.file.name,
530
+ exc,
531
+ )
459
532
 
460
533
  if self.file and not self.file.name.endswith(".pdf"):
461
534
  raise ValidationError("Only PDF files are allowed")
@@ -463,18 +536,17 @@ class RawPdfFile(models.Model):
463
536
  # If hash is still missing after potential creation logic (e.g., direct instantiation)
464
537
  # and the file exists in storage, try calculating it from storage path.
465
538
  # This is less ideal as it requires the file to be saved first.
466
- if not self.pdf_hash and self.pk and self.file and self.file.storage.exists(self.file.name):
539
+ if not self.pdf_hash and self.pk and self.file and file_exists(self.file):
467
540
  try:
468
- file_path = Path(self.file.path).resolve()
469
- if not file_path.exists():
470
- raise FileNotFoundError(f"File path does not exist: {file_path}")
471
- logger.warning(f"Hash missing for saved file {self.file.name}. Recalculating.")
472
- with self.file.storage.open(self.file.name, 'rb') as f:
473
- self.pdf_hash = get_pdf_hash(file_path) # Assuming get_pdf_hash handles file obj
474
- # No need to save again just for hash unless update_fields is used carefully
475
- # Let the main super().save() handle saving the hash if it changed
476
- except Exception as e:
477
- logger.error("Could not calculate hash during save for existing file %s: %s", self.file.name, e)
541
+ with ensure_local_file(self.file) as local_path:
542
+ logger.warning("Hash missing for saved file %s. Recalculating.", self.file.name)
543
+ self.pdf_hash = get_pdf_hash(local_path)
544
+ except Exception as exc:
545
+ logger.error(
546
+ "Could not calculate hash during save for existing file %s: %s",
547
+ self.file.name,
548
+ exc,
549
+ )
478
550
 
479
551
  # Derive related fields from sensitive_meta if available
480
552
  if not self.patient and self.sensitive_meta:
@@ -483,7 +555,7 @@ class RawPdfFile(models.Model):
483
555
  self.examination = self.sensitive_meta.pseudo_examination
484
556
  if not self.center and self.sensitive_meta:
485
557
  self.center = self.sensitive_meta.center
486
- #TODO Outdated?
558
+ # TODO Outdated?
487
559
  # if not self.examiner and self.sensitive_meta and hasattr(self.sensitive_meta, 'pseudo_examiner'):
488
560
  # self.examiner = self.sensitive_meta.pseudo_examiner
489
561
 
@@ -492,7 +564,7 @@ class RawPdfFile(models.Model):
492
564
  def get_or_create_state(self) -> "RawPdfState":
493
565
  """
494
566
  Retrieve the associated RawPdfState for this RawPdfFile, creating and linking a new one if none exists.
495
-
567
+
496
568
  Returns:
497
569
  RawPdfState: The existing or newly created RawPdfState instance linked to this RawPdfFile.
498
570
  """
@@ -514,7 +586,7 @@ class RawPdfFile(models.Model):
514
586
  # Ensure fallback_file is a Path object.
515
587
  """
516
588
  Checks if the stored PDF file exists in storage and attempts to restore it from a fallback file path if missing.
517
-
589
+
518
590
  Parameters:
519
591
  fallback_file: Path or string representing the fallback file location to restore from if the stored file is missing.
520
592
  """
@@ -534,7 +606,6 @@ class RawPdfFile(models.Model):
534
606
  else:
535
607
  logger.error(f"Fallback file {fallback_file} does not exist.")
536
608
  except Exception as e:
537
-
538
609
  logger.error(f"Error during verify_existing_file for {_file.name}: {e}")
539
610
 
540
611
  def process_file(self, text, anonymized_text, report_meta, verbose):
@@ -562,18 +633,19 @@ class RawPdfFile(models.Model):
562
633
  for key, value in serializable_report_meta.items():
563
634
  if isinstance(value, (datetime, date)):
564
635
  serializable_report_meta[key] = value.isoformat()
565
-
566
- self.raw_meta = serializable_report_meta # Assign the version with string dates
567
636
 
568
- sensitive_meta.save() # Save SensitiveMeta first
569
- self.save() # Then save RawPdfFile
637
+ self.raw_meta = serializable_report_meta # Assign the version with string dates
638
+
639
+ sensitive_meta.save() # Save SensitiveMeta first
640
+ self.save() # Then save RawPdfFile
570
641
 
571
642
  return text, anonymized_text, report_meta
572
643
 
573
644
  def get_report_reader_config(self):
645
+ from warnings import warn
646
+
574
647
  from ...administration import Center
575
648
  from ...metadata.pdf_meta import PdfType
576
- from warnings import warn
577
649
 
578
650
  _center = self.center
579
651
  assert _center is not None, "Center must be set to get report reader config"
@@ -604,10 +676,10 @@ class RawPdfFile(models.Model):
604
676
  }
605
677
 
606
678
  return settings_dict
607
-
679
+
608
680
  @staticmethod
609
681
  def get_pdf_by_id(pdf_id: int) -> "RawPdfFile":
610
682
  try:
611
683
  return RawPdfFile.objects.get(pk=pdf_id)
612
684
  except RawPdfFile.DoesNotExist:
613
- raise ValueError(f"PDF with ID {pdf_id} does not exist.")
685
+ raise ValueError(f"PDF with ID {pdf_id} does not exist.")