endoreg-db 0.8.8.0__py3-none-any.whl → 0.8.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (402) hide show
  1. endoreg_db/data/__init__.py +22 -8
  2. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +0 -1
  3. endoreg_db/data/examination/examinations/data.yaml +114 -14
  4. endoreg_db/data/examination/time-type/data.yaml +0 -3
  5. endoreg_db/data/examination_indication/endoscopy.yaml +108 -173
  6. endoreg_db/data/examination_indication_classification/endoscopy.yaml +0 -70
  7. endoreg_db/data/examination_indication_classification_choice/endoscopy.yaml +33 -37
  8. endoreg_db/data/finding/00_generic.yaml +35 -0
  9. endoreg_db/data/finding/00_generic_complication.yaml +9 -0
  10. endoreg_db/data/finding/01_gastroscopy_baseline.yaml +88 -0
  11. endoreg_db/data/finding/01_gastroscopy_observation.yaml +113 -0
  12. endoreg_db/data/finding/02_colonoscopy_baseline.yaml +53 -0
  13. endoreg_db/data/finding/02_colonoscopy_hidden.yaml +119 -0
  14. endoreg_db/data/finding/02_colonoscopy_observation.yaml +152 -0
  15. endoreg_db/data/finding_classification/00_generic.yaml +44 -0
  16. endoreg_db/data/finding_classification/00_generic_histology.yaml +28 -0
  17. endoreg_db/data/finding_classification/00_generic_lesion.yaml +52 -0
  18. endoreg_db/data/finding_classification/{colonoscopy_bowel_preparation.yaml → 02_colonoscopy_baseline.yaml} +35 -20
  19. endoreg_db/data/finding_classification/02_colonoscopy_histology.yaml +13 -0
  20. endoreg_db/data/finding_classification/02_colonoscopy_other.yaml +12 -0
  21. endoreg_db/data/finding_classification/02_colonoscopy_polyp.yaml +101 -0
  22. endoreg_db/data/finding_classification_choice/{yes_no_na.yaml → 00_generic.yaml} +5 -1
  23. endoreg_db/data/finding_classification_choice/{examination_setting_generic_types.yaml → 00_generic_baseline.yaml} +10 -2
  24. endoreg_db/data/finding_classification_choice/{complication_generic_types.yaml → 00_generic_complication.yaml} +1 -1
  25. endoreg_db/data/finding_classification_choice/{histology.yaml → 00_generic_histology.yaml} +1 -4
  26. endoreg_db/data/finding_classification_choice/00_generic_lesion.yaml +158 -0
  27. endoreg_db/data/finding_classification_choice/{bowel_preparation.yaml → 02_colonoscopy_bowel_preparation.yaml} +1 -30
  28. endoreg_db/data/{_examples/finding_classification_choice/colonoscopy_not_complete_reason.yaml → finding_classification_choice/02_colonoscopy_generic.yaml} +1 -1
  29. endoreg_db/data/finding_classification_choice/{histology_polyp.yaml → 02_colonoscopy_histology.yaml} +1 -1
  30. endoreg_db/data/{_examples/finding_classification_choice/colonoscopy_location.yaml → finding_classification_choice/02_colonoscopy_location.yaml} +23 -4
  31. endoreg_db/data/finding_classification_choice/02_colonoscopy_other.yaml +34 -0
  32. endoreg_db/data/finding_classification_choice/02_colonoscopy_polyp_advanced_imaging.yaml +76 -0
  33. endoreg_db/data/{_examples/finding_classification_choice/colon_lesion_paris.yaml → finding_classification_choice/02_colonoscopy_polyp_morphology.yaml} +26 -8
  34. endoreg_db/data/finding_classification_choice/02_colonoscopy_size.yaml +27 -0
  35. endoreg_db/data/finding_classification_type/{colonoscopy_basic.yaml → 00_generic.yaml} +18 -13
  36. endoreg_db/data/finding_classification_type/02_colonoscopy.yaml +9 -0
  37. endoreg_db/data/finding_intervention/00_generic_endoscopy.yaml +59 -0
  38. endoreg_db/data/finding_intervention/00_generic_endoscopy_ablation.yaml +44 -0
  39. endoreg_db/data/finding_intervention/00_generic_endoscopy_bleeding.yaml +55 -0
  40. endoreg_db/data/finding_intervention/00_generic_endoscopy_resection.yaml +85 -0
  41. endoreg_db/data/finding_intervention/00_generic_endoscopy_stenosis.yaml +17 -0
  42. endoreg_db/data/finding_intervention/00_generic_endoscopy_stent.yaml +9 -0
  43. endoreg_db/data/finding_intervention/01_gastroscopy.yaml +19 -0
  44. endoreg_db/data/finding_intervention/04_eus.yaml +39 -0
  45. endoreg_db/data/finding_intervention/05_ercp.yaml +3 -0
  46. endoreg_db/data/finding_type/data.yaml +8 -12
  47. endoreg_db/data/requirement/01_patient_data.yaml +93 -0
  48. endoreg_db/data/requirement_operator/new_operators.yaml +36 -0
  49. endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +0 -2
  50. endoreg_db/data/requirement_set/90_coloreg.yaml +20 -8
  51. endoreg_db/exceptions.py +0 -1
  52. endoreg_db/forms/examination_form.py +1 -1
  53. endoreg_db/helpers/data_loader.py +124 -52
  54. endoreg_db/helpers/default_objects.py +116 -81
  55. endoreg_db/import_files/__init__.py +27 -0
  56. endoreg_db/import_files/context/__init__.py +7 -0
  57. endoreg_db/import_files/context/default_sensitive_meta.py +81 -0
  58. endoreg_db/import_files/context/ensure_center.py +17 -0
  59. endoreg_db/import_files/context/file_lock.py +66 -0
  60. endoreg_db/import_files/context/import_context.py +43 -0
  61. endoreg_db/import_files/context/validate_directories.py +56 -0
  62. endoreg_db/import_files/file_storage/__init__.py +15 -0
  63. endoreg_db/import_files/file_storage/create_report_file.py +76 -0
  64. endoreg_db/import_files/file_storage/create_video_file.py +75 -0
  65. endoreg_db/import_files/file_storage/sensitive_meta_storage.py +39 -0
  66. endoreg_db/import_files/file_storage/state_management.py +496 -0
  67. endoreg_db/import_files/file_storage/storage.py +36 -0
  68. endoreg_db/import_files/import_service.md +26 -0
  69. endoreg_db/import_files/processing/__init__.py +11 -0
  70. endoreg_db/import_files/processing/report_processing/report_anonymization.py +94 -0
  71. endoreg_db/import_files/processing/sensitive_meta_adapter.py +51 -0
  72. endoreg_db/import_files/processing/video_processing/video_anonymization.py +107 -0
  73. endoreg_db/import_files/pseudonymization/fake.py +52 -0
  74. endoreg_db/import_files/pseudonymization/k_anonymity.py +182 -0
  75. endoreg_db/import_files/pseudonymization/k_pseudonymity.py +128 -0
  76. endoreg_db/import_files/pseudonymization/pseudonymize.py +0 -0
  77. endoreg_db/import_files/report_import_service.py +141 -0
  78. endoreg_db/import_files/video_import_service.py +150 -0
  79. endoreg_db/management/commands/import_report.py +130 -65
  80. endoreg_db/management/commands/import_video_with_classification.py +1 -1
  81. endoreg_db/management/commands/load_ai_model_data.py +5 -5
  82. endoreg_db/management/commands/load_ai_model_label_data.py +9 -7
  83. endoreg_db/management/commands/load_base_db_data.py +5 -134
  84. endoreg_db/management/commands/load_contraindication_data.py +14 -16
  85. endoreg_db/management/commands/load_disease_classification_choices_data.py +15 -18
  86. endoreg_db/management/commands/load_disease_classification_data.py +15 -18
  87. endoreg_db/management/commands/load_disease_data.py +25 -28
  88. endoreg_db/management/commands/load_endoscope_data.py +20 -27
  89. endoreg_db/management/commands/load_event_data.py +14 -16
  90. endoreg_db/management/commands/load_examination_data.py +31 -44
  91. endoreg_db/management/commands/load_examination_indication_data.py +20 -21
  92. endoreg_db/management/commands/load_finding_data.py +52 -80
  93. endoreg_db/management/commands/load_information_source.py +21 -23
  94. endoreg_db/management/commands/load_lab_value_data.py +17 -26
  95. endoreg_db/management/commands/load_medication_data.py +13 -12
  96. endoreg_db/management/commands/load_organ_data.py +15 -19
  97. endoreg_db/management/commands/load_pdf_type_data.py +19 -18
  98. endoreg_db/management/commands/load_profession_data.py +14 -17
  99. endoreg_db/management/commands/load_qualification_data.py +20 -23
  100. endoreg_db/management/commands/load_report_reader_flag_data.py +17 -19
  101. endoreg_db/management/commands/load_requirement_data.py +14 -20
  102. endoreg_db/management/commands/load_risk_data.py +7 -6
  103. endoreg_db/management/commands/load_shift_data.py +20 -23
  104. endoreg_db/management/commands/load_tag_data.py +8 -11
  105. endoreg_db/management/commands/load_unit_data.py +17 -19
  106. endoreg_db/management/commands/start_filewatcher.py +46 -37
  107. endoreg_db/management/commands/validate_video_files.py +1 -5
  108. endoreg_db/migrations/0001_initial.py +1360 -1812
  109. endoreg_db/models/administration/person/patient/patient.py +72 -46
  110. endoreg_db/models/label/__init__.py +2 -2
  111. endoreg_db/models/label/annotation/video_segmentation_annotation.py +18 -26
  112. endoreg_db/models/label/label_video_segment/label_video_segment.py +23 -1
  113. endoreg_db/models/media/pdf/raw_pdf.py +136 -64
  114. endoreg_db/models/media/pdf/report_reader/report_reader_config.py +34 -10
  115. endoreg_db/models/media/processing_history/__init__.py +5 -0
  116. endoreg_db/models/media/processing_history/processing_history.py +96 -0
  117. endoreg_db/models/media/video/create_from_file.py +101 -31
  118. endoreg_db/models/media/video/video_file.py +125 -105
  119. endoreg_db/models/media/video/video_file_io.py +31 -26
  120. endoreg_db/models/medical/contraindication/README.md +1 -0
  121. endoreg_db/models/medical/examination/examination.py +28 -8
  122. endoreg_db/models/medical/examination/examination_indication.py +13 -79
  123. endoreg_db/models/medical/examination/examination_time.py +8 -3
  124. endoreg_db/models/medical/finding/finding.py +5 -12
  125. endoreg_db/models/medical/finding/finding_classification.py +18 -37
  126. endoreg_db/models/medical/finding/finding_intervention.py +7 -9
  127. endoreg_db/models/medical/hardware/endoscope.py +6 -0
  128. endoreg_db/models/medical/patient/medication_examples.py +5 -1
  129. endoreg_db/models/medical/patient/patient_finding.py +1 -1
  130. endoreg_db/models/metadata/pdf_meta.py +22 -10
  131. endoreg_db/models/metadata/sensitive_meta.py +3 -0
  132. endoreg_db/models/metadata/sensitive_meta_logic.py +200 -124
  133. endoreg_db/models/other/information_source.py +27 -6
  134. endoreg_db/models/report/__init__.py +0 -0
  135. endoreg_db/models/report/images.py +0 -0
  136. endoreg_db/models/report/report.py +6 -0
  137. endoreg_db/models/requirement/requirement.py +59 -399
  138. endoreg_db/models/requirement/requirement_operator.py +86 -98
  139. endoreg_db/models/state/audit_ledger.py +4 -5
  140. endoreg_db/models/state/raw_pdf.py +69 -30
  141. endoreg_db/models/state/video.py +65 -49
  142. endoreg_db/models/upload_job.py +33 -9
  143. endoreg_db/models/utils.py +27 -23
  144. endoreg_db/queries/__init__.py +3 -1
  145. endoreg_db/schemas/examination_evaluation.py +1 -1
  146. endoreg_db/serializers/__init__.py +2 -8
  147. endoreg_db/serializers/label_video_segment/label_video_segment.py +2 -29
  148. endoreg_db/serializers/meta/__init__.py +1 -6
  149. endoreg_db/serializers/misc/sensitive_patient_data.py +50 -26
  150. endoreg_db/serializers/patient_examination/patient_examination.py +3 -3
  151. endoreg_db/serializers/pdf/anony_text_validation.py +39 -23
  152. endoreg_db/serializers/video/video_file_list.py +65 -34
  153. endoreg_db/services/__old/pdf_import.py +1487 -0
  154. endoreg_db/services/__old/video_import.py +1306 -0
  155. endoreg_db/services/anonymization.py +63 -26
  156. endoreg_db/services/lookup_service.py +28 -28
  157. endoreg_db/services/lookup_store.py +2 -2
  158. endoreg_db/services/pdf_import.py +0 -1480
  159. endoreg_db/services/report_import.py +10 -0
  160. endoreg_db/services/video_import.py +6 -1165
  161. endoreg_db/tasks/upload_tasks.py +79 -70
  162. endoreg_db/tasks/video_ingest.py +8 -4
  163. endoreg_db/urls/__init__.py +0 -14
  164. endoreg_db/urls/ai.py +32 -0
  165. endoreg_db/urls/media.py +21 -24
  166. endoreg_db/utils/dataloader.py +87 -57
  167. endoreg_db/utils/paths.py +110 -46
  168. endoreg_db/utils/pipelines/Readme.md +1 -1
  169. endoreg_db/utils/requirement_operator_logic/new_operator_logic.py +97 -0
  170. endoreg_db/utils/video/ffmpeg_wrapper.py +217 -52
  171. endoreg_db/views/__init__.py +85 -173
  172. endoreg_db/views/ai/__init__.py +8 -0
  173. endoreg_db/views/ai/label.py +155 -0
  174. endoreg_db/views/anonymization/media_management.py +8 -7
  175. endoreg_db/views/anonymization/overview.py +97 -68
  176. endoreg_db/views/anonymization/validate.py +25 -21
  177. endoreg_db/views/media/__init__.py +5 -20
  178. endoreg_db/views/media/pdf_media.py +109 -65
  179. endoreg_db/views/media/sensitive_metadata.py +163 -148
  180. endoreg_db/views/meta/__init__.py +0 -8
  181. endoreg_db/views/misc/__init__.py +1 -7
  182. endoreg_db/views/misc/upload_views.py +94 -93
  183. endoreg_db/views/report/__init__.py +7 -0
  184. endoreg_db/views/{pdf → report}/reimport.py +45 -24
  185. endoreg_db/views/{pdf/pdf_stream.py → report/report_stream.py} +40 -32
  186. endoreg_db/views/requirement/lookup_store.py +22 -90
  187. endoreg_db/views/video/__init__.py +23 -22
  188. endoreg_db/views/video/correction.py +201 -172
  189. endoreg_db/views/video/reimport.py +1 -1
  190. endoreg_db/views/{media/video_segments.py → video/segments_crud.py} +75 -37
  191. endoreg_db/views/video/{video_meta.py → video_meta_stats.py} +2 -2
  192. endoreg_db/views/video/video_stream.py +7 -8
  193. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/METADATA +2 -2
  194. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/RECORD +217 -335
  195. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/WHEEL +1 -1
  196. endoreg_db/data/_examples/disease.yaml +0 -55
  197. endoreg_db/data/_examples/disease_classification.yaml +0 -13
  198. endoreg_db/data/_examples/disease_classification_choice.yaml +0 -62
  199. endoreg_db/data/_examples/event.yaml +0 -64
  200. endoreg_db/data/_examples/examination.yaml +0 -72
  201. endoreg_db/data/_examples/finding/anatomy_colon.yaml +0 -128
  202. endoreg_db/data/_examples/finding/colonoscopy.yaml +0 -40
  203. endoreg_db/data/_examples/finding/colonoscopy_bowel_prep.yaml +0 -56
  204. endoreg_db/data/_examples/finding/complication.yaml +0 -16
  205. endoreg_db/data/_examples/finding/data.yaml +0 -105
  206. endoreg_db/data/_examples/finding/examination_setting.yaml +0 -16
  207. endoreg_db/data/_examples/finding/medication_related.yaml +0 -18
  208. endoreg_db/data/_examples/finding/outcome.yaml +0 -12
  209. endoreg_db/data/_examples/finding_classification/colonoscopy_bowel_preparation.yaml +0 -68
  210. endoreg_db/data/_examples/finding_classification/colonoscopy_jnet.yaml +0 -22
  211. endoreg_db/data/_examples/finding_classification/colonoscopy_kudo.yaml +0 -25
  212. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_circularity.yaml +0 -20
  213. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_planarity.yaml +0 -24
  214. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_size.yaml +0 -68
  215. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_surface.yaml +0 -20
  216. endoreg_db/data/_examples/finding_classification/colonoscopy_location.yaml +0 -80
  217. endoreg_db/data/_examples/finding_classification/colonoscopy_lst.yaml +0 -21
  218. endoreg_db/data/_examples/finding_classification/colonoscopy_nice.yaml +0 -20
  219. endoreg_db/data/_examples/finding_classification/colonoscopy_paris.yaml +0 -26
  220. endoreg_db/data/_examples/finding_classification/colonoscopy_sano.yaml +0 -22
  221. endoreg_db/data/_examples/finding_classification/colonoscopy_summary.yaml +0 -53
  222. endoreg_db/data/_examples/finding_classification/complication_generic.yaml +0 -25
  223. endoreg_db/data/_examples/finding_classification/examination_setting_generic.yaml +0 -40
  224. endoreg_db/data/_examples/finding_classification/histology_colo.yaml +0 -51
  225. endoreg_db/data/_examples/finding_classification/intervention_required.yaml +0 -26
  226. endoreg_db/data/_examples/finding_classification/medication_related.yaml +0 -23
  227. endoreg_db/data/_examples/finding_classification/visualized.yaml +0 -33
  228. endoreg_db/data/_examples/finding_classification_choice/bowel_preparation.yaml +0 -78
  229. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_circularity_default.yaml +0 -32
  230. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_jnet.yaml +0 -15
  231. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_kudo.yaml +0 -23
  232. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_lst.yaml +0 -15
  233. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_nice.yaml +0 -17
  234. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_planarity_default.yaml +0 -49
  235. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_sano.yaml +0 -14
  236. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_surface_intact_default.yaml +0 -36
  237. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_size.yaml +0 -82
  238. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +0 -15
  239. endoreg_db/data/_examples/finding_classification_choice/complication_generic_types.yaml +0 -15
  240. endoreg_db/data/_examples/finding_classification_choice/examination_setting_generic_types.yaml +0 -15
  241. endoreg_db/data/_examples/finding_classification_choice/histology.yaml +0 -24
  242. endoreg_db/data/_examples/finding_classification_choice/histology_polyp.yaml +0 -20
  243. endoreg_db/data/_examples/finding_classification_choice/outcome.yaml +0 -19
  244. endoreg_db/data/_examples/finding_classification_choice/yes_no_na.yaml +0 -11
  245. endoreg_db/data/_examples/finding_classification_type/colonoscopy_basic.yaml +0 -48
  246. endoreg_db/data/_examples/finding_intervention/endoscopy.yaml +0 -43
  247. endoreg_db/data/_examples/finding_intervention/endoscopy_colonoscopy.yaml +0 -168
  248. endoreg_db/data/_examples/finding_intervention/endoscopy_egd.yaml +0 -128
  249. endoreg_db/data/_examples/finding_intervention/endoscopy_ercp.yaml +0 -32
  250. endoreg_db/data/_examples/finding_intervention/endoscopy_eus_lower.yaml +0 -9
  251. endoreg_db/data/_examples/finding_intervention/endoscopy_eus_upper.yaml +0 -36
  252. endoreg_db/data/_examples/finding_intervention_type/endoscopy.yaml +0 -15
  253. endoreg_db/data/_examples/finding_type/data.yaml +0 -43
  254. endoreg_db/data/_examples/requirement/age.yaml +0 -26
  255. endoreg_db/data/_examples/requirement/gender.yaml +0 -25
  256. endoreg_db/data/_examples/requirement_set/01_endoscopy_generic.yaml +0 -48
  257. endoreg_db/data/_examples/requirement_set/colonoscopy_austria_screening.yaml +0 -57
  258. endoreg_db/data/_examples/requirement_set/endoscopy_bleeding_risk.yaml +0 -52
  259. endoreg_db/data/_examples/yaml_examples.xlsx +0 -0
  260. endoreg_db/data/finding/anatomy_colon.yaml +0 -128
  261. endoreg_db/data/finding/colonoscopy.yaml +0 -40
  262. endoreg_db/data/finding/colonoscopy_bowel_prep.yaml +0 -56
  263. endoreg_db/data/finding/complication.yaml +0 -16
  264. endoreg_db/data/finding/data.yaml +0 -105
  265. endoreg_db/data/finding/examination_setting.yaml +0 -16
  266. endoreg_db/data/finding/medication_related.yaml +0 -18
  267. endoreg_db/data/finding/outcome.yaml +0 -12
  268. endoreg_db/data/finding_classification/colonoscopy_jnet.yaml +0 -22
  269. endoreg_db/data/finding_classification/colonoscopy_kudo.yaml +0 -25
  270. endoreg_db/data/finding_classification/colonoscopy_lesion_circularity.yaml +0 -20
  271. endoreg_db/data/finding_classification/colonoscopy_lesion_planarity.yaml +0 -24
  272. endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +0 -38
  273. endoreg_db/data/finding_classification/colonoscopy_lesion_surface.yaml +0 -20
  274. endoreg_db/data/finding_classification/colonoscopy_location.yaml +0 -49
  275. endoreg_db/data/finding_classification/colonoscopy_lst.yaml +0 -21
  276. endoreg_db/data/finding_classification/colonoscopy_nice.yaml +0 -20
  277. endoreg_db/data/finding_classification/colonoscopy_paris.yaml +0 -26
  278. endoreg_db/data/finding_classification/colonoscopy_sano.yaml +0 -22
  279. endoreg_db/data/finding_classification/colonoscopy_summary.yaml +0 -53
  280. endoreg_db/data/finding_classification/complication_generic.yaml +0 -25
  281. endoreg_db/data/finding_classification/examination_setting_generic.yaml +0 -40
  282. endoreg_db/data/finding_classification/histology_colo.yaml +0 -43
  283. endoreg_db/data/finding_classification/intervention_required.yaml +0 -26
  284. endoreg_db/data/finding_classification/medication_related.yaml +0 -23
  285. endoreg_db/data/finding_classification/visualized.yaml +0 -33
  286. endoreg_db/data/finding_classification_choice/colon_lesion_circularity_default.yaml +0 -32
  287. endoreg_db/data/finding_classification_choice/colon_lesion_jnet.yaml +0 -15
  288. endoreg_db/data/finding_classification_choice/colon_lesion_kudo.yaml +0 -23
  289. endoreg_db/data/finding_classification_choice/colon_lesion_lst.yaml +0 -15
  290. endoreg_db/data/finding_classification_choice/colon_lesion_nice.yaml +0 -17
  291. endoreg_db/data/finding_classification_choice/colon_lesion_paris.yaml +0 -57
  292. endoreg_db/data/finding_classification_choice/colon_lesion_planarity_default.yaml +0 -49
  293. endoreg_db/data/finding_classification_choice/colon_lesion_sano.yaml +0 -14
  294. endoreg_db/data/finding_classification_choice/colon_lesion_surface_intact_default.yaml +0 -36
  295. endoreg_db/data/finding_classification_choice/colonoscopy_location.yaml +0 -229
  296. endoreg_db/data/finding_classification_choice/colonoscopy_not_complete_reason.yaml +0 -19
  297. endoreg_db/data/finding_classification_choice/colonoscopy_size.yaml +0 -82
  298. endoreg_db/data/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +0 -15
  299. endoreg_db/data/finding_classification_choice/outcome.yaml +0 -19
  300. endoreg_db/data/finding_intervention/endoscopy.yaml +0 -43
  301. endoreg_db/data/finding_intervention/endoscopy_colonoscopy.yaml +0 -168
  302. endoreg_db/data/finding_intervention/endoscopy_egd.yaml +0 -128
  303. endoreg_db/data/finding_intervention/endoscopy_ercp.yaml +0 -32
  304. endoreg_db/data/finding_intervention/endoscopy_eus_lower.yaml +0 -9
  305. endoreg_db/data/finding_intervention/endoscopy_eus_upper.yaml +0 -36
  306. endoreg_db/data/finding_morphology_classification_type/colonoscopy.yaml +0 -79
  307. endoreg_db/data/requirement/age.yaml +0 -26
  308. endoreg_db/data/requirement/colonoscopy_baseline_austria.yaml +0 -45
  309. endoreg_db/data/requirement/disease_cardiovascular.yaml +0 -79
  310. endoreg_db/data/requirement/disease_classification_choice_cardiovascular.yaml +0 -41
  311. endoreg_db/data/requirement/disease_hepatology.yaml +0 -12
  312. endoreg_db/data/requirement/disease_misc.yaml +0 -12
  313. endoreg_db/data/requirement/disease_renal.yaml +0 -96
  314. endoreg_db/data/requirement/endoscopy_bleeding_risk.yaml +0 -59
  315. endoreg_db/data/requirement/event_cardiology.yaml +0 -251
  316. endoreg_db/data/requirement/event_requirements.yaml +0 -145
  317. endoreg_db/data/requirement/finding_colon_polyp.yaml +0 -50
  318. endoreg_db/data/requirement/gender.yaml +0 -25
  319. endoreg_db/data/requirement/lab_value.yaml +0 -441
  320. endoreg_db/data/requirement/medication.yaml +0 -93
  321. endoreg_db/data/requirement_operator/age.yaml +0 -13
  322. endoreg_db/data/requirement_operator/lab_operators.yaml +0 -129
  323. endoreg_db/data/requirement_operator/model_operators.yaml +0 -96
  324. endoreg_db/management/commands/init_default_ai_model.py +0 -112
  325. endoreg_db/management/commands/reset_celery_schedule.py +0 -9
  326. endoreg_db/management/commands/validate_video.py +0 -204
  327. endoreg_db/migrations/0002_requirementset_depends_on.py +0 -18
  328. endoreg_db/migrations/_old/0001_initial.py +0 -1857
  329. endoreg_db/migrations/_old/0002_add_video_correction_models.py +0 -52
  330. endoreg_db/migrations/_old/0003_add_center_display_name.py +0 -30
  331. endoreg_db/migrations/_old/0004_employee_city_employee_post_code_employee_street_and_more.py +0 -68
  332. endoreg_db/migrations/_old/0004_remove_casetemplate_rules_and_more.py +0 -77
  333. endoreg_db/migrations/_old/0005_merge_20251111_1003.py +0 -14
  334. endoreg_db/migrations/_old/0006_sensitivemeta_anonymized_text_and_more.py +0 -68
  335. endoreg_db/migrations/_old/0007_remove_rule_attribute_dtype_remove_rule_rule_type_and_more.py +0 -89
  336. endoreg_db/migrations/_old/0008_remove_event_event_classification_and_more.py +0 -27
  337. endoreg_db/migrations/_old/0009_alter_modelmeta_options_and_more.py +0 -21
  338. endoreg_db/renames.yml +0 -8
  339. endoreg_db/serializers/_old/raw_pdf_meta_validation.py +0 -223
  340. endoreg_db/serializers/_old/raw_video_meta_validation.py +0 -179
  341. endoreg_db/serializers/_old/video.py +0 -71
  342. endoreg_db/serializers/meta/pdf_file_meta_extraction.py +0 -115
  343. endoreg_db/serializers/meta/report_meta.py +0 -53
  344. endoreg_db/serializers/report/__init__.py +0 -9
  345. endoreg_db/serializers/report/mixins.py +0 -45
  346. endoreg_db/serializers/report/report.py +0 -105
  347. endoreg_db/serializers/report/report_list.py +0 -22
  348. endoreg_db/serializers/report/secure_file_url.py +0 -26
  349. endoreg_db/services/requirements_object.py +0 -147
  350. endoreg_db/services/storage_aware_video_processor.py +0 -370
  351. endoreg_db/urls/files.py +0 -6
  352. endoreg_db/urls/label_video_segment_validate.py +0 -33
  353. endoreg_db/urls/label_video_segments.py +0 -46
  354. endoreg_db/views/label/__init__.py +0 -5
  355. endoreg_db/views/label/label.py +0 -15
  356. endoreg_db/views/label_video_segment/__init__.py +0 -16
  357. endoreg_db/views/label_video_segment/create_lvs_from_annotation.py +0 -44
  358. endoreg_db/views/label_video_segment/get_lvs_by_name_and_video.py +0 -50
  359. endoreg_db/views/label_video_segment/label_video_segment.py +0 -77
  360. endoreg_db/views/label_video_segment/label_video_segment_by_label.py +0 -174
  361. endoreg_db/views/label_video_segment/label_video_segment_detail.py +0 -73
  362. endoreg_db/views/label_video_segment/update_lvs_from_annotation.py +0 -46
  363. endoreg_db/views/label_video_segment/validate.py +0 -226
  364. endoreg_db/views/media/segments.py +0 -71
  365. endoreg_db/views/meta/available_files_list.py +0 -146
  366. endoreg_db/views/meta/report_meta.py +0 -53
  367. endoreg_db/views/meta/sensitive_meta_detail.py +0 -85
  368. endoreg_db/views/misc/secure_file_serving_view.py +0 -80
  369. endoreg_db/views/misc/secure_file_url_view.py +0 -84
  370. endoreg_db/views/misc/secure_url_validate.py +0 -79
  371. endoreg_db/views/patient_examination/DEPRECATED_video_backup.py +0 -164
  372. endoreg_db/views/patient_finding_location/__init__.py +0 -5
  373. endoreg_db/views/patient_finding_location/pfl_create.py +0 -70
  374. endoreg_db/views/patient_finding_morphology/__init__.py +0 -5
  375. endoreg_db/views/patient_finding_morphology/pfm_create.py +0 -70
  376. endoreg_db/views/pdf/__init__.py +0 -8
  377. endoreg_db/views/video/segmentation.py +0 -274
  378. endoreg_db/views/video/task_status.py +0 -49
  379. endoreg_db/views/video/timeline.py +0 -46
  380. endoreg_db/views/video/video_analyze.py +0 -52
  381. /endoreg_db/data/requirement/{colon_polyp_intervention.yaml → old/colon_polyp_intervention.yaml} +0 -0
  382. /endoreg_db/data/{_examples/requirement → requirement/old}/colonoscopy_baseline_austria.yaml +0 -0
  383. /endoreg_db/data/requirement/{coloreg_colon_polyp.yaml → old/coloreg_colon_polyp.yaml} +0 -0
  384. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_cardiovascular.yaml +0 -0
  385. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_classification_choice_cardiovascular.yaml +0 -0
  386. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_hepatology.yaml +0 -0
  387. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_misc.yaml +0 -0
  388. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_renal.yaml +0 -0
  389. /endoreg_db/data/{_examples/requirement → requirement/old}/endoscopy_bleeding_risk.yaml +0 -0
  390. /endoreg_db/data/{_examples/requirement → requirement/old}/event_cardiology.yaml +0 -0
  391. /endoreg_db/data/{_examples/requirement → requirement/old}/event_requirements.yaml +0 -0
  392. /endoreg_db/data/{_examples/requirement → requirement/old}/finding_colon_polyp.yaml +0 -0
  393. /endoreg_db/{urls/sensitive_meta.py → data/requirement/old/gender.yaml} +0 -0
  394. /endoreg_db/data/{_examples/requirement → requirement/old}/lab_value.yaml +0 -0
  395. /endoreg_db/data/{_examples/requirement → requirement/old}/medication.yaml +0 -0
  396. /endoreg_db/data/{_examples/requirement_operator → requirement_operator/_old}/age.yaml +0 -0
  397. /endoreg_db/data/{_examples/requirement_operator → requirement_operator/_old}/lab_operators.yaml +0 -0
  398. /endoreg_db/data/{_examples/requirement_operator → requirement_operator/_old}/model_operators.yaml +0 -0
  399. /endoreg_db/{views/pdf/pdf_stream_views.py → import_files/pseudonymization/__init__.py} +0 -0
  400. /endoreg_db/utils/requirement_operator_logic/{lab_value_operators.py → _old/lab_value_operators.py} +0 -0
  401. /endoreg_db/utils/requirement_operator_logic/{model_evaluators.py → _old/model_evaluators.py} +0 -0
  402. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,496 @@
1
+ from endoreg_db.models.media.processing_history.processing_history import ProcessingHistory
2
+ from endoreg_db.utils.paths import IMPORT_REPORT_DIR, IMPORT_VIDEO_DIR, ANONYM_REPORT_DIR, ANONYM_VIDEO_DIR
3
+
4
+ import os
5
+ import logging
6
+ import shutil
7
+ from pathlib import Path
8
+ from typing import Optional, Union
9
+
10
+ from django.db import transaction
11
+
12
+ from endoreg_db.import_files.context.import_context import ImportContext
13
+ from endoreg_db.models.media import RawPdfFile, VideoFile
14
+ from endoreg_db.models.state import RawPdfState, VideoState
15
+ from endoreg_db.utils import paths as path_utils
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def _ensure_instance_state(instance: Union[VideoFile, RawPdfFile]) -> Optional[Union[RawPdfState, VideoState]]:
21
+ """
22
+ Helper: ensure instance.state exists and return it.
23
+ Mirrors PdfImportService._ensure_state.
24
+ """
25
+ if isinstance(instance, RawPdfFile):
26
+ state = getattr(instance, "state", None)
27
+ else:
28
+ state = getattr(instance, "state", None)
29
+
30
+ if state is not None:
31
+ return state
32
+
33
+ if hasattr(instance, "get_or_create_state"):
34
+ state = instance.get_or_create_state()
35
+ instance.save()
36
+ return state
37
+
38
+ return None
39
+
40
+ def mark_instance_processing_started(
41
+ instance: Union[RawPdfFile, VideoFile],
42
+ ctx: ImportContext,):
43
+ state = _ensure_instance_state(instance)
44
+
45
+ with transaction.atomic():
46
+ if state is not None:
47
+
48
+ # In the old code, processing_started was set earlier; we guard here
49
+ if not getattr(state, "processing_started", False) and hasattr(
50
+ state, "mark_processing_started"
51
+ ):
52
+ state.mark_processing_started()
53
+
54
+
55
+ def finalize_report_success(
56
+ ctx: ImportContext,
57
+ ) -> None:
58
+ """
59
+ Finalize a successful instance import/anonymization.
60
+
61
+ - Move anonymized Report from temp to canonical anonymized dir
62
+ - Update RawPdfFile.processed_file and .anonymized flag
63
+ - Mark RawPdfState as anonymized + sensitive_meta_processed
64
+ - Mark ProcessingHistory.success = True
65
+ """
66
+ instance = ctx.current_report
67
+ if not isinstance(instance, RawPdfFile):
68
+ logger.warning("finalize_success called with unsaved instance")
69
+ return
70
+ if not instance.pk:
71
+ logger.warning("finalize_success called with unsaved instance")
72
+ return
73
+
74
+ # --- Move anonymized path into final storage (if we have one) ---
75
+ final_path: Optional[Path] = None
76
+ if ctx.anonymized_path is None:
77
+ logger.warning(
78
+ "No anonymized_path for instance %s (hash=%s); skipping file move.",
79
+ instance.pk,
80
+ getattr(instance, "pdf_hash", None),
81
+ )
82
+ final_path = None
83
+ else:
84
+ pdf_hash = getattr(instance, "pdf_hash", None) or instance.pk
85
+ expected_final_path = ANONYM_REPORT_DIR / f"{pdf_hash}.pdf"
86
+
87
+ src = Path(ctx.anonymized_path)
88
+
89
+ logger.debug(
90
+ "finalize_report_success: src=%s (exists=%s, resolved=%s), expected_final=%s",
91
+ src,
92
+ src.exists(),
93
+ src.resolve(),
94
+ expected_final_path,
95
+ )
96
+
97
+ # If anonymizer already wrote to the final path, don't move
98
+ if src.resolve() == expected_final_path.resolve():
99
+ logger.info(
100
+ "Anonymizer output already at final path %s; skipping move.",
101
+ expected_final_path,
102
+ )
103
+ final_path = expected_final_path
104
+ else:
105
+ # Only move if the source actually exists
106
+ if not src.exists():
107
+ logger.error(
108
+ "Anonymized file %s does not exist; cannot move to %s",
109
+ src,
110
+ expected_final_path,
111
+ )
112
+ final_path = None
113
+ else:
114
+ ANONYM_REPORT_DIR.mkdir(parents=True, exist_ok=True)
115
+ if expected_final_path.exists():
116
+ expected_final_path.unlink()
117
+ shutil.move(str(src), str(expected_final_path))
118
+ final_path = expected_final_path
119
+ logger.info("Moved anonymized report to %s", final_path)
120
+
121
+ # Update FileField if we have a final path
122
+ if final_path is not None:
123
+ relative_name = path_utils.to_storage_relative(final_path)
124
+ current_name = getattr(instance.processed_file, "name", None)
125
+ if current_name != relative_name:
126
+ instance.processed_file.name = relative_name
127
+ logger.info("Updated processed_file to %s", relative_name)
128
+ try:
129
+ relative_name = str(ctx.anonymized_path)
130
+ except ValueError:
131
+ # Fallback: absolute path if outside STORAGE_DIR
132
+ relative_name = str(final_path)
133
+
134
+ current_name = getattr(instance.processed_file, "name", None)
135
+ if current_name != relative_name:
136
+ instance.processed_file.name = relative_name
137
+ logger.info(
138
+ "Updated processed_file reference to: %s",
139
+ instance.processed_file.name,
140
+ )
141
+
142
+
143
+ # --- Update RawPdfState flags (mirrors _finalize_processing) ---
144
+ state = _ensure_instance_state(instance)
145
+
146
+ with transaction.atomic():
147
+ if state is not None:
148
+
149
+ # In the old code, processing_started was set earlier; we guard here
150
+ if not getattr(state, "processing_started", False) and hasattr(
151
+ state, "mark_processing_started"
152
+ ):
153
+ state.mark_processing_started()
154
+
155
+ # We consider text/meta extraction + anonymization done at this point
156
+ if hasattr(state, "mark_anonymized"):
157
+ state.mark_anonymized()
158
+ if hasattr(state, "mark_sensitive_meta_processed"):
159
+ state.mark_sensitive_meta_processed()
160
+
161
+ state.save()
162
+
163
+ instance.save()
164
+
165
+ # --- ProcessingHistory entry ---
166
+ try:
167
+ with transaction.atomic():
168
+ ProcessingHistory.get_or_create_for_object(
169
+ obj=instance,
170
+ success=True,
171
+ )
172
+ except Exception as e:
173
+ logger.debug(
174
+ "Saving not possible; %s"
175
+ f"skipping ProcessingHistory.{e}",
176
+ instance.pk,
177
+ )
178
+
179
+ def finalize_video_success(
180
+ ctx: ImportContext,
181
+ ) -> None:
182
+ """
183
+ Finalize a successful video import/anonymization.
184
+
185
+ - Move anonymized video from temp to canonical anonymized dir
186
+ - Update VideoFile.processed_file
187
+ - Mark VideoState as anonymized + sensitive_meta_processed
188
+ - Mark ProcessingHistory.success = True
189
+ """
190
+ nuke = nuke_transcoding_dir()
191
+ assert(nuke is True)
192
+ instance = ctx.current_video
193
+ if not isinstance(instance, VideoFile):
194
+ logger.warning("finalize_video_success called with non-VideoFile instance")
195
+ return
196
+ if not instance.pk:
197
+ logger.warning("finalize_video_success called with unsaved instance")
198
+ return
199
+
200
+ # --- Move anonymized path into final storage (if we have one) ---
201
+ final_path: Optional[Path] = None
202
+
203
+ if ctx.anonymized_path is None:
204
+ logger.warning(
205
+ "No anonymized_path for video instance %s (hash=%s); skipping file move.",
206
+ instance.pk,
207
+ getattr(instance, "video_hash", None),
208
+ )
209
+ else:
210
+ # Use a stable naming convention: <video_hash>.mp4
211
+ video_hash = getattr(instance, "video_hash", None) or instance.pk
212
+ expected_final_path = ANONYM_VIDEO_DIR / f"{video_hash}.mp4"
213
+
214
+ src = Path(ctx.anonymized_path)
215
+
216
+ logger.debug(
217
+ "finalize_video_success: src=%s (exists=%s, resolved=%s), expected_final=%s",
218
+ src,
219
+ src.exists(),
220
+ src.resolve(),
221
+ expected_final_path,
222
+ )
223
+
224
+ # If anonymizer already wrote to the final path, don't move
225
+ try:
226
+ same_target = src.resolve() == expected_final_path.resolve()
227
+ except FileNotFoundError:
228
+ # src might not exist anymore
229
+ same_target = False
230
+
231
+ if same_target:
232
+ logger.info(
233
+ "Anonymizer output already at final video path %s; skipping move.",
234
+ expected_final_path,
235
+ )
236
+ final_path = expected_final_path
237
+ else:
238
+ if not src.exists():
239
+ logger.error(
240
+ "Anonymized video %s does not exist; cannot move to %s",
241
+ src,
242
+ expected_final_path,
243
+ )
244
+ final_path = None
245
+ else:
246
+ ANONYM_VIDEO_DIR.mkdir(parents=True, exist_ok=True)
247
+ if expected_final_path.exists():
248
+ try:
249
+ expected_final_path.unlink()
250
+ except Exception as e:
251
+ logger.warning(
252
+ "Could not remove existing anonymized video %s: %s",
253
+ expected_final_path,
254
+ e,
255
+ )
256
+ shutil.move(str(src), str(expected_final_path))
257
+ final_path = expected_final_path
258
+ logger.info("Moved anonymized video to %s", final_path)
259
+
260
+ # Update FileField if we have a final path
261
+ if final_path is not None:
262
+ relative_name = path_utils.to_storage_relative(final_path)
263
+ current_name = getattr(instance.processed_file, "name", None)
264
+ if current_name != relative_name:
265
+ instance.processed_file.name = relative_name
266
+ logger.info("Updated video processed_file to %s", relative_name)
267
+
268
+ # --- Update VideoState flags (mirrors report) ---
269
+ state = _ensure_instance_state(instance)
270
+
271
+
272
+
273
+ with transaction.atomic():
274
+ if state is not None:
275
+ if not getattr(state, "processing_started", False) and hasattr(
276
+ state, "mark_processing_started"
277
+ ):
278
+ state.mark_processing_started()
279
+
280
+ if hasattr(state, "mark_anonymized"):
281
+ state.mark_anonymized()
282
+ if hasattr(state, "mark_sensitive_meta_processed"):
283
+ state.mark_sensitive_meta_processed()
284
+
285
+ state.save()
286
+
287
+ instance.save()
288
+
289
+ # --- ProcessingHistory entry ---
290
+ try:
291
+ with transaction.atomic():
292
+ ProcessingHistory.get_or_create_for_object(
293
+ obj=instance,
294
+ success=True,
295
+ )
296
+ except Exception as e:
297
+ logger.debug(
298
+ "Saving not possible for video %s; skipping ProcessingHistory. Error: %s",
299
+ instance.pk,
300
+ e,
301
+ )
302
+
303
+
304
+ def finalize_failure(
305
+ ctx: ImportContext,
306
+ ) -> None:
307
+ """
308
+ Finalize a failed instance import/anonymization.
309
+
310
+ - Reset RawPdfState flags to "not processed"
311
+ - Mark ProcessingHistory.success = False
312
+ """
313
+ if ctx.instance is None:
314
+ if isinstance(ctx.current_report, RawPdfFile):
315
+ ctx.instance = ctx.current_report
316
+ elif isinstance(ctx.current_video, VideoFile):
317
+ ctx.instance = ctx.current_video
318
+ else:
319
+ raise Exception
320
+ # Reset state flags similar to _mark_processing_incomplete / _cleanup_on_error
321
+ state = _ensure_instance_state(ctx.instance)
322
+
323
+ if state is not None:
324
+ try:
325
+ state.mark_processing_not_started()
326
+
327
+ state.save()
328
+ logger.info(
329
+ "Reset instance state for failed processing (instance pk=%s)",
330
+ ctx.instance.pk,
331
+ )
332
+ except Exception as e:
333
+ logger.warning(
334
+ "Failed to reset State for instance %s: %s",
335
+ ctx.instance.pk,
336
+ e,
337
+ )
338
+
339
+ try:
340
+ delete_associated_files(ctx)
341
+ except Exception as e:
342
+ logger.warning(f"There might be files remaining. {e}")
343
+
344
+ # History entry with success=False
345
+ if ctx.file_hash:
346
+ ProcessingHistory.get_or_create_for_object(
347
+ obj=ctx.instance,
348
+ success=False,
349
+ )
350
+ else:
351
+ logger.debug(
352
+ "No file_hash in context for instance %s when finalizing failure; "
353
+ "skipping ProcessingHistory.",
354
+ ctx.instance.pk,
355
+ )
356
+
357
+ logger.error(
358
+ "Report processing failed for %s",
359
+ ctx.file_path,
360
+ )
361
+
362
+ def delete_associated_files(ctx: ImportContext) -> None:
363
+ """
364
+ Best-effort cleanup of anonymized, sensitive and transcoding artefacts.
365
+
366
+ - Ensure ctx.original_path points to an existing import file; if not, try to restore
367
+ from ctx.sensitive_path into the appropriate IMPORT_*_DIR.
368
+ - Delete anonymized file (if any).
369
+ - Nuke transcoding directory.
370
+ - Delete sensitive file (if any).
371
+
372
+ This function should *not* raise on non-critical cleanup errors; it logs instead.
373
+ Only restoration of the original import file is treated as critical.
374
+ """
375
+
376
+ # --- 1. Restore original import file if needed (critical) ---
377
+ original_missing = not isinstance(ctx.original_path, Path) or not ctx.original_path.exists()
378
+ if original_missing:
379
+ logger.warning(
380
+ "Original file missing in ctx (file_type=%s); "
381
+ "trying to restore from sensitive copy.",
382
+ ctx.file_type,
383
+ )
384
+
385
+ if not isinstance(ctx.sensitive_path, Path) or not ctx.sensitive_path.exists():
386
+ # This is serious: we lost both original and sensitive copy
387
+ msg = (
388
+ f"Cannot restore original file for {ctx.file_type}: "
389
+ "sensitive copy missing as well."
390
+ )
391
+ logger.error(msg)
392
+ raise RuntimeError(msg)
393
+
394
+ try:
395
+ if ctx.file_type == "video":
396
+ target_dir = IMPORT_VIDEO_DIR
397
+ elif ctx.file_type == "report":
398
+ target_dir = IMPORT_REPORT_DIR
399
+ else:
400
+ raise ValueError(f"Unknown file_type in context: {ctx.file_type}")
401
+
402
+ target_dir.mkdir(parents=True, exist_ok=True)
403
+ restored_path = shutil.copy2(ctx.sensitive_path, target_dir)
404
+ ctx.original_path = Path(restored_path)
405
+ logger.info("Restored original file for %s to %s", ctx.file_type, ctx.original_path)
406
+ except Exception as e:
407
+ logger.error("Error during safety copy / restore of original file: %s", e, exc_info=True)
408
+ raise
409
+
410
+ # --- 2. Delete anonymized file (best-effort) ---
411
+ if isinstance(ctx.anonymized_path, Path):
412
+ try:
413
+ if ctx.anonymized_path.exists() and isinstance(ctx.anonymized_path, Path):
414
+ ctx.anonymized_path.unlink()
415
+ logger.info("Deleted anonymized file %s", ctx.anonymized_path)
416
+ except Exception as e:
417
+ logger.error("Error when unlinking anonymized path %s: %s", ctx.anonymized_path, e, exc_info=True)
418
+ if ctx.anonymized_path.exists() and isinstance( ctx.anonymized_path, str):
419
+ if isinstance(ctx.current_video, VideoFile):
420
+ p = Path(path_utils.data_paths["anonym_video" / ctx.anonymized_path])
421
+ p.unlink()
422
+ elif isinstance(ctx.current_report, RawPdfFile):
423
+ p = Path(path_utils.data_paths["anonym_report" / ctx.anonymized_path])
424
+ p.unlink()
425
+ if ctx.anonymized_path.exists():
426
+ ctx.anonymized_path.rmdir()
427
+ finally:
428
+ if ctx.anonymized_path.exists():
429
+ raise AssertionError("Anonym file remains after all deletion attempts.")
430
+ ctx.anonymized_path = None
431
+
432
+ # --- 3. Nuke transcoding directory (best-effort) ---
433
+ if not nuke_transcoding_dir():
434
+ logger.warning("Transcoding directory cleanup returned False; there may be leftover files.")
435
+
436
+ # --- 4. Delete sensitive file (best-effort) ---
437
+ if isinstance(ctx.sensitive_path, Path):
438
+ try:
439
+ if ctx.sensitive_path.exists():
440
+ ctx.sensitive_path.unlink()
441
+ logger.info("Deleted sensitive file %s", ctx.sensitive_path)
442
+ except Exception as e:
443
+ logger.error("Error when unlinking sensitive path %s: %s", ctx.sensitive_path, e, exc_info=True)
444
+ if ctx.sensitive_path.exists() and isinstance( ctx.sensitive_path, str):
445
+ if isinstance(ctx.current_video, VideoFile):
446
+ p = Path(path_utils.data_paths["sensitive_video" / ctx.sensitive_path])
447
+ p.unlink()
448
+ elif isinstance(ctx.current_report, RawPdfFile):
449
+ p = Path(path_utils.data_paths["sensitive_report" / ctx.sensitive_path])
450
+ p.unlink()
451
+ if ctx.sensitive_path.exists():
452
+ ctx.sensitive_path.rmdir()
453
+ finally:
454
+ if ctx.sensitive_path.exists():
455
+ raise AssertionError("Sensitive file remains after all deletion attempts.")
456
+ ctx.sensitive_path = None
457
+
458
+
459
+ def nuke_transcoding_dir(
460
+ transcoding_dir: Union[str, Path, None] = None
461
+ ) -> bool:
462
+ """
463
+ Delete all files and subdirectories inside the transcoding directory.
464
+
465
+ Returns:
466
+ True if the directory was either empty / successfully cleaned,
467
+ False if something went wrong (error is logged).
468
+ """
469
+ try:
470
+ if transcoding_dir is None:
471
+ transcoding_dir = path_utils.data_paths["transcoding"]
472
+
473
+ transcoding_dir = Path(transcoding_dir)
474
+
475
+ if not transcoding_dir.exists():
476
+ logger.info("Transcoding dir %s does not exist; nothing to clean.", transcoding_dir)
477
+ return True
478
+
479
+ if not transcoding_dir.is_dir():
480
+ logger.error("Configured transcoding path %s is not a directory.", transcoding_dir)
481
+ return False
482
+
483
+ for entry in transcoding_dir.iterdir():
484
+ try:
485
+ if entry.is_file() or entry.is_symlink():
486
+ entry.unlink()
487
+ elif entry.is_dir():
488
+ shutil.rmtree(entry)
489
+ except Exception as e:
490
+ logger.warning("Failed to remove entry %s in transcoding dir: %s", entry, e)
491
+ # Continue trying to delete other entries
492
+ return True
493
+
494
+ except Exception as e:
495
+ logger.error("Unexpected error while nuking transcoding dir: %s", e, exc_info=True)
496
+ return False
@@ -0,0 +1,36 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from endoreg_db.models.media.video.create_from_file import atomic_copy_with_fallback, atomic_move_with_fallback
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def ensure_dir(path: Path) -> None:
8
+ path.mkdir(parents=True, exist_ok=True)
9
+
10
+
11
+ def create_sensitive_copy(src: Path, sensitive_root: Path) -> Path:
12
+ """
13
+ Create a sensitive copy of `src` in `sensitive_root`.
14
+
15
+ Returns:
16
+ Path to the sensitive copy.
17
+ """
18
+ ensure_dir(sensitive_root)
19
+ dest = sensitive_root / src.name
20
+ logger.info("Creating sensitive copy: %s -> %s", src, dest)
21
+ atomic_copy_with_fallback(src, dest)
22
+ return dest
23
+
24
+
25
+ def move_to_anonymized(temp_path: Path, anonymized_root: Path) -> Path:
26
+ """
27
+ Move a (temporary) anonymized file into the canonical anonymized root.
28
+
29
+ Returns:
30
+ Final path inside anonymized_root.
31
+ """
32
+ ensure_dir(anonymized_root)
33
+ dest = anonymized_root / temp_path.name
34
+ logger.info("Moving anonymized file: %s -> %s", temp_path, dest)
35
+ atomic_move_with_fallback(temp_path, dest)
36
+ return dest
@@ -0,0 +1,26 @@
1
+ # File Import and Anonymization
2
+
3
+ Endoreg-Db imports are guarded by a anonymization step, that is supposed to ensure most data is redacted from the input. Here, fake patients are generated to pseudonymize the sensitive information in the data. This ensures, that videos as well as pdfs are not distributed using sensitive data, and if they are by some accident it is harder to know what data is actually real.
4
+
5
+ The Import is handled by two orchestration files:
6
+
7
+ Report import service (RIS)
8
+
9
+ and
10
+
11
+ Video import Service (VIS)
12
+
13
+ The orchestration is abstracted out by the base import service (BIS), to ensure newly implemented data imports follow the same structure and to ensure tests run agnostically of the actual media being processed.
14
+
15
+ ## Import Order of Execution
16
+
17
+ The Import starts, when files are dropped into the corresponding media import folders. The locations need to be passed to the import service logic. To ensure atomic processing without overwhelming the server or double processing on parallelization, a file lock is added to the files that are currently processed.
18
+
19
+ ### File Lock
20
+
21
+ File Lock is implemented as a context manager. Per default, this means during the execution the files are marked by adding a additional .lock file path inside the folder. Once the code wrapped in the context manager of file lock stops execution, the .lock file is removed only after error processing. This ensures, the full pipeline is executed on each run even when interrupted.
22
+ https://book.pythontips.com/en/latest/context_managers.html
23
+
24
+ ### Error Cleanup
25
+
26
+ The ErrorCleanup class is called from inside the file lock context manager to avoid leaving half processed files laying around. It passes file type to the class instance and then runs the correct processing logic.
@@ -0,0 +1,11 @@
1
+ # processing/__init__.py
2
+
3
+ from . import sensitive_meta_adapter
4
+ from . import report_processing
5
+ from . import video_processing
6
+
7
+ __all__ = [
8
+ "sensitive_meta_adapter",
9
+ "report_processing",
10
+ "video_processing",
11
+ ]
@@ -0,0 +1,94 @@
1
+ import logging
2
+ import os
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Any, Callable, Literal, NoReturn
6
+
7
+ from lx_anonymizer import ReportReader
8
+ from lx_anonymizer.sensitive_meta_interface import SensitiveMeta as LxSM
9
+
10
+ from endoreg_db.import_files.context import ImportContext
11
+ from endoreg_db.import_files.file_storage.sensitive_meta_storage import sensitive_meta_storage
12
+ from endoreg_db.utils.paths import ANONYM_REPORT_DIR
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class ReportAnonymizer:
19
+ def __init__(self):
20
+ self._report_reader_class = None
21
+ self._ensure_report_reading_available()
22
+ self.storage = False
23
+
24
+
25
+ def anonymize_report(self, ctx: ImportContext):
26
+
27
+ # Setup anonymized directory
28
+ anonymized_dir = ANONYM_REPORT_DIR
29
+ anonymized_dir.mkdir(parents=True, exist_ok=True)
30
+ assert ctx.current_report is not None
31
+ # Generate output path for anonymized report
32
+ pdf_hash = ctx.current_report.pdf_hash
33
+ anonymized_output_path = anonymized_dir / f"{pdf_hash}.pdf"
34
+ self._report_reader_class = ReportReader()
35
+
36
+ assert isinstance(self._report_reader_class, ReportReader)
37
+
38
+ # Process with enhanced process_report method (returns 4-tuple now)
39
+ ctx.original_text, ctx.anonymized_text, extracted_metadata, ctx.anonymized_path = self._report_reader_class.process_report(
40
+ pdf_path=ctx.file_path,
41
+ create_anonymized_pdf=True,
42
+ anonymized_pdf_output_path=str(anonymized_output_path),
43
+ )
44
+
45
+ if ctx.anonymized_path:
46
+ logger.info("DEBUG: after anonymizer, ctx.anonymized_path=%s (exists=%s)",
47
+ ctx.anonymized_path, isinstance(ctx.anonymized_path, str))
48
+
49
+ sm = LxSM()
50
+ sm.safe_update(extracted_metadata)
51
+
52
+ self.storage = sensitive_meta_storage(sm, ctx.current_report)
53
+ return ctx
54
+
55
+ def _ensure_report_reading_available(
56
+ self
57
+ ) -> None:
58
+ """
59
+ Ensure report reading modules are available by adding lx-anonymizer to path.
60
+
61
+ Returns:
62
+ Tuple of (availability_flag, ReportReader_class)
63
+ """
64
+
65
+ try:
66
+ # Try direct import first
67
+ from lx_anonymizer import ReportReader
68
+
69
+ logger.info("Successfully imported lx_anonymizer ReportReader module")
70
+ self._report_reader_available = True
71
+ self._report_reader_class = ReportReader
72
+
73
+ except ImportError:
74
+ # Optional: honor LX_ANONYMIZER_PATH=/abs/path/to/src
75
+ import importlib
76
+
77
+ extra = os.getenv("LX_ANONYMIZER_PATH")
78
+ if extra and extra not in sys.path and Path(extra).exists():
79
+ sys.path.insert(0, extra)
80
+ try:
81
+ mod = importlib.import_module("lx_anonymizer")
82
+ ReportReader = getattr(mod, "ReportReader")
83
+ logger.info(
84
+ "Imported lx_anonymizer.ReportReader via LX_ANONYMIZER_PATH"
85
+ )
86
+ self._report_reader_available = True
87
+ self._report_reader_class = ReportReader
88
+ except Exception as e:
89
+ logger.warning(
90
+ "Failed importing lx_anonymizer via LX_ANONYMIZER_PATH: %s", e
91
+ )
92
+
93
+ self._report_reader_available = False
94
+ self._report_reader_class = None