endoreg-db 0.8.8.0__py3-none-any.whl → 0.8.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (402) hide show
  1. endoreg_db/data/__init__.py +22 -8
  2. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +0 -1
  3. endoreg_db/data/examination/examinations/data.yaml +114 -14
  4. endoreg_db/data/examination/time-type/data.yaml +0 -3
  5. endoreg_db/data/examination_indication/endoscopy.yaml +108 -173
  6. endoreg_db/data/examination_indication_classification/endoscopy.yaml +0 -70
  7. endoreg_db/data/examination_indication_classification_choice/endoscopy.yaml +33 -37
  8. endoreg_db/data/finding/00_generic.yaml +35 -0
  9. endoreg_db/data/finding/00_generic_complication.yaml +9 -0
  10. endoreg_db/data/finding/01_gastroscopy_baseline.yaml +88 -0
  11. endoreg_db/data/finding/01_gastroscopy_observation.yaml +113 -0
  12. endoreg_db/data/finding/02_colonoscopy_baseline.yaml +53 -0
  13. endoreg_db/data/finding/02_colonoscopy_hidden.yaml +119 -0
  14. endoreg_db/data/finding/02_colonoscopy_observation.yaml +152 -0
  15. endoreg_db/data/finding_classification/00_generic.yaml +44 -0
  16. endoreg_db/data/finding_classification/00_generic_histology.yaml +28 -0
  17. endoreg_db/data/finding_classification/00_generic_lesion.yaml +52 -0
  18. endoreg_db/data/finding_classification/{colonoscopy_bowel_preparation.yaml → 02_colonoscopy_baseline.yaml} +35 -20
  19. endoreg_db/data/finding_classification/02_colonoscopy_histology.yaml +13 -0
  20. endoreg_db/data/finding_classification/02_colonoscopy_other.yaml +12 -0
  21. endoreg_db/data/finding_classification/02_colonoscopy_polyp.yaml +101 -0
  22. endoreg_db/data/finding_classification_choice/{yes_no_na.yaml → 00_generic.yaml} +5 -1
  23. endoreg_db/data/finding_classification_choice/{examination_setting_generic_types.yaml → 00_generic_baseline.yaml} +10 -2
  24. endoreg_db/data/finding_classification_choice/{complication_generic_types.yaml → 00_generic_complication.yaml} +1 -1
  25. endoreg_db/data/finding_classification_choice/{histology.yaml → 00_generic_histology.yaml} +1 -4
  26. endoreg_db/data/finding_classification_choice/00_generic_lesion.yaml +158 -0
  27. endoreg_db/data/finding_classification_choice/{bowel_preparation.yaml → 02_colonoscopy_bowel_preparation.yaml} +1 -30
  28. endoreg_db/data/{_examples/finding_classification_choice/colonoscopy_not_complete_reason.yaml → finding_classification_choice/02_colonoscopy_generic.yaml} +1 -1
  29. endoreg_db/data/finding_classification_choice/{histology_polyp.yaml → 02_colonoscopy_histology.yaml} +1 -1
  30. endoreg_db/data/{_examples/finding_classification_choice/colonoscopy_location.yaml → finding_classification_choice/02_colonoscopy_location.yaml} +23 -4
  31. endoreg_db/data/finding_classification_choice/02_colonoscopy_other.yaml +34 -0
  32. endoreg_db/data/finding_classification_choice/02_colonoscopy_polyp_advanced_imaging.yaml +76 -0
  33. endoreg_db/data/{_examples/finding_classification_choice/colon_lesion_paris.yaml → finding_classification_choice/02_colonoscopy_polyp_morphology.yaml} +26 -8
  34. endoreg_db/data/finding_classification_choice/02_colonoscopy_size.yaml +27 -0
  35. endoreg_db/data/finding_classification_type/{colonoscopy_basic.yaml → 00_generic.yaml} +18 -13
  36. endoreg_db/data/finding_classification_type/02_colonoscopy.yaml +9 -0
  37. endoreg_db/data/finding_intervention/00_generic_endoscopy.yaml +59 -0
  38. endoreg_db/data/finding_intervention/00_generic_endoscopy_ablation.yaml +44 -0
  39. endoreg_db/data/finding_intervention/00_generic_endoscopy_bleeding.yaml +55 -0
  40. endoreg_db/data/finding_intervention/00_generic_endoscopy_resection.yaml +85 -0
  41. endoreg_db/data/finding_intervention/00_generic_endoscopy_stenosis.yaml +17 -0
  42. endoreg_db/data/finding_intervention/00_generic_endoscopy_stent.yaml +9 -0
  43. endoreg_db/data/finding_intervention/01_gastroscopy.yaml +19 -0
  44. endoreg_db/data/finding_intervention/04_eus.yaml +39 -0
  45. endoreg_db/data/finding_intervention/05_ercp.yaml +3 -0
  46. endoreg_db/data/finding_type/data.yaml +8 -12
  47. endoreg_db/data/requirement/01_patient_data.yaml +93 -0
  48. endoreg_db/data/requirement_operator/new_operators.yaml +36 -0
  49. endoreg_db/data/requirement_set/01_endoscopy_generic.yaml +0 -2
  50. endoreg_db/data/requirement_set/90_coloreg.yaml +20 -8
  51. endoreg_db/exceptions.py +0 -1
  52. endoreg_db/forms/examination_form.py +1 -1
  53. endoreg_db/helpers/data_loader.py +124 -52
  54. endoreg_db/helpers/default_objects.py +116 -81
  55. endoreg_db/import_files/__init__.py +27 -0
  56. endoreg_db/import_files/context/__init__.py +7 -0
  57. endoreg_db/import_files/context/default_sensitive_meta.py +81 -0
  58. endoreg_db/import_files/context/ensure_center.py +17 -0
  59. endoreg_db/import_files/context/file_lock.py +66 -0
  60. endoreg_db/import_files/context/import_context.py +43 -0
  61. endoreg_db/import_files/context/validate_directories.py +56 -0
  62. endoreg_db/import_files/file_storage/__init__.py +15 -0
  63. endoreg_db/import_files/file_storage/create_report_file.py +76 -0
  64. endoreg_db/import_files/file_storage/create_video_file.py +75 -0
  65. endoreg_db/import_files/file_storage/sensitive_meta_storage.py +39 -0
  66. endoreg_db/import_files/file_storage/state_management.py +496 -0
  67. endoreg_db/import_files/file_storage/storage.py +36 -0
  68. endoreg_db/import_files/import_service.md +26 -0
  69. endoreg_db/import_files/processing/__init__.py +11 -0
  70. endoreg_db/import_files/processing/report_processing/report_anonymization.py +94 -0
  71. endoreg_db/import_files/processing/sensitive_meta_adapter.py +51 -0
  72. endoreg_db/import_files/processing/video_processing/video_anonymization.py +107 -0
  73. endoreg_db/import_files/pseudonymization/fake.py +52 -0
  74. endoreg_db/import_files/pseudonymization/k_anonymity.py +182 -0
  75. endoreg_db/import_files/pseudonymization/k_pseudonymity.py +128 -0
  76. endoreg_db/import_files/pseudonymization/pseudonymize.py +0 -0
  77. endoreg_db/import_files/report_import_service.py +141 -0
  78. endoreg_db/import_files/video_import_service.py +150 -0
  79. endoreg_db/management/commands/import_report.py +130 -65
  80. endoreg_db/management/commands/import_video_with_classification.py +1 -1
  81. endoreg_db/management/commands/load_ai_model_data.py +5 -5
  82. endoreg_db/management/commands/load_ai_model_label_data.py +9 -7
  83. endoreg_db/management/commands/load_base_db_data.py +5 -134
  84. endoreg_db/management/commands/load_contraindication_data.py +14 -16
  85. endoreg_db/management/commands/load_disease_classification_choices_data.py +15 -18
  86. endoreg_db/management/commands/load_disease_classification_data.py +15 -18
  87. endoreg_db/management/commands/load_disease_data.py +25 -28
  88. endoreg_db/management/commands/load_endoscope_data.py +20 -27
  89. endoreg_db/management/commands/load_event_data.py +14 -16
  90. endoreg_db/management/commands/load_examination_data.py +31 -44
  91. endoreg_db/management/commands/load_examination_indication_data.py +20 -21
  92. endoreg_db/management/commands/load_finding_data.py +52 -80
  93. endoreg_db/management/commands/load_information_source.py +21 -23
  94. endoreg_db/management/commands/load_lab_value_data.py +17 -26
  95. endoreg_db/management/commands/load_medication_data.py +13 -12
  96. endoreg_db/management/commands/load_organ_data.py +15 -19
  97. endoreg_db/management/commands/load_pdf_type_data.py +19 -18
  98. endoreg_db/management/commands/load_profession_data.py +14 -17
  99. endoreg_db/management/commands/load_qualification_data.py +20 -23
  100. endoreg_db/management/commands/load_report_reader_flag_data.py +17 -19
  101. endoreg_db/management/commands/load_requirement_data.py +14 -20
  102. endoreg_db/management/commands/load_risk_data.py +7 -6
  103. endoreg_db/management/commands/load_shift_data.py +20 -23
  104. endoreg_db/management/commands/load_tag_data.py +8 -11
  105. endoreg_db/management/commands/load_unit_data.py +17 -19
  106. endoreg_db/management/commands/start_filewatcher.py +46 -37
  107. endoreg_db/management/commands/validate_video_files.py +1 -5
  108. endoreg_db/migrations/0001_initial.py +1360 -1812
  109. endoreg_db/models/administration/person/patient/patient.py +72 -46
  110. endoreg_db/models/label/__init__.py +2 -2
  111. endoreg_db/models/label/annotation/video_segmentation_annotation.py +18 -26
  112. endoreg_db/models/label/label_video_segment/label_video_segment.py +23 -1
  113. endoreg_db/models/media/pdf/raw_pdf.py +136 -64
  114. endoreg_db/models/media/pdf/report_reader/report_reader_config.py +34 -10
  115. endoreg_db/models/media/processing_history/__init__.py +5 -0
  116. endoreg_db/models/media/processing_history/processing_history.py +96 -0
  117. endoreg_db/models/media/video/create_from_file.py +101 -31
  118. endoreg_db/models/media/video/video_file.py +125 -105
  119. endoreg_db/models/media/video/video_file_io.py +31 -26
  120. endoreg_db/models/medical/contraindication/README.md +1 -0
  121. endoreg_db/models/medical/examination/examination.py +28 -8
  122. endoreg_db/models/medical/examination/examination_indication.py +13 -79
  123. endoreg_db/models/medical/examination/examination_time.py +8 -3
  124. endoreg_db/models/medical/finding/finding.py +5 -12
  125. endoreg_db/models/medical/finding/finding_classification.py +18 -37
  126. endoreg_db/models/medical/finding/finding_intervention.py +7 -9
  127. endoreg_db/models/medical/hardware/endoscope.py +6 -0
  128. endoreg_db/models/medical/patient/medication_examples.py +5 -1
  129. endoreg_db/models/medical/patient/patient_finding.py +1 -1
  130. endoreg_db/models/metadata/pdf_meta.py +22 -10
  131. endoreg_db/models/metadata/sensitive_meta.py +3 -0
  132. endoreg_db/models/metadata/sensitive_meta_logic.py +200 -124
  133. endoreg_db/models/other/information_source.py +27 -6
  134. endoreg_db/models/report/__init__.py +0 -0
  135. endoreg_db/models/report/images.py +0 -0
  136. endoreg_db/models/report/report.py +6 -0
  137. endoreg_db/models/requirement/requirement.py +59 -399
  138. endoreg_db/models/requirement/requirement_operator.py +86 -98
  139. endoreg_db/models/state/audit_ledger.py +4 -5
  140. endoreg_db/models/state/raw_pdf.py +69 -30
  141. endoreg_db/models/state/video.py +65 -49
  142. endoreg_db/models/upload_job.py +33 -9
  143. endoreg_db/models/utils.py +27 -23
  144. endoreg_db/queries/__init__.py +3 -1
  145. endoreg_db/schemas/examination_evaluation.py +1 -1
  146. endoreg_db/serializers/__init__.py +2 -8
  147. endoreg_db/serializers/label_video_segment/label_video_segment.py +2 -29
  148. endoreg_db/serializers/meta/__init__.py +1 -6
  149. endoreg_db/serializers/misc/sensitive_patient_data.py +50 -26
  150. endoreg_db/serializers/patient_examination/patient_examination.py +3 -3
  151. endoreg_db/serializers/pdf/anony_text_validation.py +39 -23
  152. endoreg_db/serializers/video/video_file_list.py +65 -34
  153. endoreg_db/services/__old/pdf_import.py +1487 -0
  154. endoreg_db/services/__old/video_import.py +1306 -0
  155. endoreg_db/services/anonymization.py +63 -26
  156. endoreg_db/services/lookup_service.py +28 -28
  157. endoreg_db/services/lookup_store.py +2 -2
  158. endoreg_db/services/pdf_import.py +0 -1480
  159. endoreg_db/services/report_import.py +10 -0
  160. endoreg_db/services/video_import.py +6 -1165
  161. endoreg_db/tasks/upload_tasks.py +79 -70
  162. endoreg_db/tasks/video_ingest.py +8 -4
  163. endoreg_db/urls/__init__.py +0 -14
  164. endoreg_db/urls/ai.py +32 -0
  165. endoreg_db/urls/media.py +21 -24
  166. endoreg_db/utils/dataloader.py +87 -57
  167. endoreg_db/utils/paths.py +110 -46
  168. endoreg_db/utils/pipelines/Readme.md +1 -1
  169. endoreg_db/utils/requirement_operator_logic/new_operator_logic.py +97 -0
  170. endoreg_db/utils/video/ffmpeg_wrapper.py +217 -52
  171. endoreg_db/views/__init__.py +85 -173
  172. endoreg_db/views/ai/__init__.py +8 -0
  173. endoreg_db/views/ai/label.py +155 -0
  174. endoreg_db/views/anonymization/media_management.py +8 -7
  175. endoreg_db/views/anonymization/overview.py +97 -68
  176. endoreg_db/views/anonymization/validate.py +25 -21
  177. endoreg_db/views/media/__init__.py +5 -20
  178. endoreg_db/views/media/pdf_media.py +109 -65
  179. endoreg_db/views/media/sensitive_metadata.py +163 -148
  180. endoreg_db/views/meta/__init__.py +0 -8
  181. endoreg_db/views/misc/__init__.py +1 -7
  182. endoreg_db/views/misc/upload_views.py +94 -93
  183. endoreg_db/views/report/__init__.py +7 -0
  184. endoreg_db/views/{pdf → report}/reimport.py +45 -24
  185. endoreg_db/views/{pdf/pdf_stream.py → report/report_stream.py} +40 -32
  186. endoreg_db/views/requirement/lookup_store.py +22 -90
  187. endoreg_db/views/video/__init__.py +23 -22
  188. endoreg_db/views/video/correction.py +201 -172
  189. endoreg_db/views/video/reimport.py +1 -1
  190. endoreg_db/views/{media/video_segments.py → video/segments_crud.py} +75 -37
  191. endoreg_db/views/video/{video_meta.py → video_meta_stats.py} +2 -2
  192. endoreg_db/views/video/video_stream.py +7 -8
  193. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/METADATA +2 -2
  194. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/RECORD +217 -335
  195. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/WHEEL +1 -1
  196. endoreg_db/data/_examples/disease.yaml +0 -55
  197. endoreg_db/data/_examples/disease_classification.yaml +0 -13
  198. endoreg_db/data/_examples/disease_classification_choice.yaml +0 -62
  199. endoreg_db/data/_examples/event.yaml +0 -64
  200. endoreg_db/data/_examples/examination.yaml +0 -72
  201. endoreg_db/data/_examples/finding/anatomy_colon.yaml +0 -128
  202. endoreg_db/data/_examples/finding/colonoscopy.yaml +0 -40
  203. endoreg_db/data/_examples/finding/colonoscopy_bowel_prep.yaml +0 -56
  204. endoreg_db/data/_examples/finding/complication.yaml +0 -16
  205. endoreg_db/data/_examples/finding/data.yaml +0 -105
  206. endoreg_db/data/_examples/finding/examination_setting.yaml +0 -16
  207. endoreg_db/data/_examples/finding/medication_related.yaml +0 -18
  208. endoreg_db/data/_examples/finding/outcome.yaml +0 -12
  209. endoreg_db/data/_examples/finding_classification/colonoscopy_bowel_preparation.yaml +0 -68
  210. endoreg_db/data/_examples/finding_classification/colonoscopy_jnet.yaml +0 -22
  211. endoreg_db/data/_examples/finding_classification/colonoscopy_kudo.yaml +0 -25
  212. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_circularity.yaml +0 -20
  213. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_planarity.yaml +0 -24
  214. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_size.yaml +0 -68
  215. endoreg_db/data/_examples/finding_classification/colonoscopy_lesion_surface.yaml +0 -20
  216. endoreg_db/data/_examples/finding_classification/colonoscopy_location.yaml +0 -80
  217. endoreg_db/data/_examples/finding_classification/colonoscopy_lst.yaml +0 -21
  218. endoreg_db/data/_examples/finding_classification/colonoscopy_nice.yaml +0 -20
  219. endoreg_db/data/_examples/finding_classification/colonoscopy_paris.yaml +0 -26
  220. endoreg_db/data/_examples/finding_classification/colonoscopy_sano.yaml +0 -22
  221. endoreg_db/data/_examples/finding_classification/colonoscopy_summary.yaml +0 -53
  222. endoreg_db/data/_examples/finding_classification/complication_generic.yaml +0 -25
  223. endoreg_db/data/_examples/finding_classification/examination_setting_generic.yaml +0 -40
  224. endoreg_db/data/_examples/finding_classification/histology_colo.yaml +0 -51
  225. endoreg_db/data/_examples/finding_classification/intervention_required.yaml +0 -26
  226. endoreg_db/data/_examples/finding_classification/medication_related.yaml +0 -23
  227. endoreg_db/data/_examples/finding_classification/visualized.yaml +0 -33
  228. endoreg_db/data/_examples/finding_classification_choice/bowel_preparation.yaml +0 -78
  229. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_circularity_default.yaml +0 -32
  230. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_jnet.yaml +0 -15
  231. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_kudo.yaml +0 -23
  232. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_lst.yaml +0 -15
  233. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_nice.yaml +0 -17
  234. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_planarity_default.yaml +0 -49
  235. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_sano.yaml +0 -14
  236. endoreg_db/data/_examples/finding_classification_choice/colon_lesion_surface_intact_default.yaml +0 -36
  237. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_size.yaml +0 -82
  238. endoreg_db/data/_examples/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +0 -15
  239. endoreg_db/data/_examples/finding_classification_choice/complication_generic_types.yaml +0 -15
  240. endoreg_db/data/_examples/finding_classification_choice/examination_setting_generic_types.yaml +0 -15
  241. endoreg_db/data/_examples/finding_classification_choice/histology.yaml +0 -24
  242. endoreg_db/data/_examples/finding_classification_choice/histology_polyp.yaml +0 -20
  243. endoreg_db/data/_examples/finding_classification_choice/outcome.yaml +0 -19
  244. endoreg_db/data/_examples/finding_classification_choice/yes_no_na.yaml +0 -11
  245. endoreg_db/data/_examples/finding_classification_type/colonoscopy_basic.yaml +0 -48
  246. endoreg_db/data/_examples/finding_intervention/endoscopy.yaml +0 -43
  247. endoreg_db/data/_examples/finding_intervention/endoscopy_colonoscopy.yaml +0 -168
  248. endoreg_db/data/_examples/finding_intervention/endoscopy_egd.yaml +0 -128
  249. endoreg_db/data/_examples/finding_intervention/endoscopy_ercp.yaml +0 -32
  250. endoreg_db/data/_examples/finding_intervention/endoscopy_eus_lower.yaml +0 -9
  251. endoreg_db/data/_examples/finding_intervention/endoscopy_eus_upper.yaml +0 -36
  252. endoreg_db/data/_examples/finding_intervention_type/endoscopy.yaml +0 -15
  253. endoreg_db/data/_examples/finding_type/data.yaml +0 -43
  254. endoreg_db/data/_examples/requirement/age.yaml +0 -26
  255. endoreg_db/data/_examples/requirement/gender.yaml +0 -25
  256. endoreg_db/data/_examples/requirement_set/01_endoscopy_generic.yaml +0 -48
  257. endoreg_db/data/_examples/requirement_set/colonoscopy_austria_screening.yaml +0 -57
  258. endoreg_db/data/_examples/requirement_set/endoscopy_bleeding_risk.yaml +0 -52
  259. endoreg_db/data/_examples/yaml_examples.xlsx +0 -0
  260. endoreg_db/data/finding/anatomy_colon.yaml +0 -128
  261. endoreg_db/data/finding/colonoscopy.yaml +0 -40
  262. endoreg_db/data/finding/colonoscopy_bowel_prep.yaml +0 -56
  263. endoreg_db/data/finding/complication.yaml +0 -16
  264. endoreg_db/data/finding/data.yaml +0 -105
  265. endoreg_db/data/finding/examination_setting.yaml +0 -16
  266. endoreg_db/data/finding/medication_related.yaml +0 -18
  267. endoreg_db/data/finding/outcome.yaml +0 -12
  268. endoreg_db/data/finding_classification/colonoscopy_jnet.yaml +0 -22
  269. endoreg_db/data/finding_classification/colonoscopy_kudo.yaml +0 -25
  270. endoreg_db/data/finding_classification/colonoscopy_lesion_circularity.yaml +0 -20
  271. endoreg_db/data/finding_classification/colonoscopy_lesion_planarity.yaml +0 -24
  272. endoreg_db/data/finding_classification/colonoscopy_lesion_size.yaml +0 -38
  273. endoreg_db/data/finding_classification/colonoscopy_lesion_surface.yaml +0 -20
  274. endoreg_db/data/finding_classification/colonoscopy_location.yaml +0 -49
  275. endoreg_db/data/finding_classification/colonoscopy_lst.yaml +0 -21
  276. endoreg_db/data/finding_classification/colonoscopy_nice.yaml +0 -20
  277. endoreg_db/data/finding_classification/colonoscopy_paris.yaml +0 -26
  278. endoreg_db/data/finding_classification/colonoscopy_sano.yaml +0 -22
  279. endoreg_db/data/finding_classification/colonoscopy_summary.yaml +0 -53
  280. endoreg_db/data/finding_classification/complication_generic.yaml +0 -25
  281. endoreg_db/data/finding_classification/examination_setting_generic.yaml +0 -40
  282. endoreg_db/data/finding_classification/histology_colo.yaml +0 -43
  283. endoreg_db/data/finding_classification/intervention_required.yaml +0 -26
  284. endoreg_db/data/finding_classification/medication_related.yaml +0 -23
  285. endoreg_db/data/finding_classification/visualized.yaml +0 -33
  286. endoreg_db/data/finding_classification_choice/colon_lesion_circularity_default.yaml +0 -32
  287. endoreg_db/data/finding_classification_choice/colon_lesion_jnet.yaml +0 -15
  288. endoreg_db/data/finding_classification_choice/colon_lesion_kudo.yaml +0 -23
  289. endoreg_db/data/finding_classification_choice/colon_lesion_lst.yaml +0 -15
  290. endoreg_db/data/finding_classification_choice/colon_lesion_nice.yaml +0 -17
  291. endoreg_db/data/finding_classification_choice/colon_lesion_paris.yaml +0 -57
  292. endoreg_db/data/finding_classification_choice/colon_lesion_planarity_default.yaml +0 -49
  293. endoreg_db/data/finding_classification_choice/colon_lesion_sano.yaml +0 -14
  294. endoreg_db/data/finding_classification_choice/colon_lesion_surface_intact_default.yaml +0 -36
  295. endoreg_db/data/finding_classification_choice/colonoscopy_location.yaml +0 -229
  296. endoreg_db/data/finding_classification_choice/colonoscopy_not_complete_reason.yaml +0 -19
  297. endoreg_db/data/finding_classification_choice/colonoscopy_size.yaml +0 -82
  298. endoreg_db/data/finding_classification_choice/colonoscopy_summary_worst_finding.yaml +0 -15
  299. endoreg_db/data/finding_classification_choice/outcome.yaml +0 -19
  300. endoreg_db/data/finding_intervention/endoscopy.yaml +0 -43
  301. endoreg_db/data/finding_intervention/endoscopy_colonoscopy.yaml +0 -168
  302. endoreg_db/data/finding_intervention/endoscopy_egd.yaml +0 -128
  303. endoreg_db/data/finding_intervention/endoscopy_ercp.yaml +0 -32
  304. endoreg_db/data/finding_intervention/endoscopy_eus_lower.yaml +0 -9
  305. endoreg_db/data/finding_intervention/endoscopy_eus_upper.yaml +0 -36
  306. endoreg_db/data/finding_morphology_classification_type/colonoscopy.yaml +0 -79
  307. endoreg_db/data/requirement/age.yaml +0 -26
  308. endoreg_db/data/requirement/colonoscopy_baseline_austria.yaml +0 -45
  309. endoreg_db/data/requirement/disease_cardiovascular.yaml +0 -79
  310. endoreg_db/data/requirement/disease_classification_choice_cardiovascular.yaml +0 -41
  311. endoreg_db/data/requirement/disease_hepatology.yaml +0 -12
  312. endoreg_db/data/requirement/disease_misc.yaml +0 -12
  313. endoreg_db/data/requirement/disease_renal.yaml +0 -96
  314. endoreg_db/data/requirement/endoscopy_bleeding_risk.yaml +0 -59
  315. endoreg_db/data/requirement/event_cardiology.yaml +0 -251
  316. endoreg_db/data/requirement/event_requirements.yaml +0 -145
  317. endoreg_db/data/requirement/finding_colon_polyp.yaml +0 -50
  318. endoreg_db/data/requirement/gender.yaml +0 -25
  319. endoreg_db/data/requirement/lab_value.yaml +0 -441
  320. endoreg_db/data/requirement/medication.yaml +0 -93
  321. endoreg_db/data/requirement_operator/age.yaml +0 -13
  322. endoreg_db/data/requirement_operator/lab_operators.yaml +0 -129
  323. endoreg_db/data/requirement_operator/model_operators.yaml +0 -96
  324. endoreg_db/management/commands/init_default_ai_model.py +0 -112
  325. endoreg_db/management/commands/reset_celery_schedule.py +0 -9
  326. endoreg_db/management/commands/validate_video.py +0 -204
  327. endoreg_db/migrations/0002_requirementset_depends_on.py +0 -18
  328. endoreg_db/migrations/_old/0001_initial.py +0 -1857
  329. endoreg_db/migrations/_old/0002_add_video_correction_models.py +0 -52
  330. endoreg_db/migrations/_old/0003_add_center_display_name.py +0 -30
  331. endoreg_db/migrations/_old/0004_employee_city_employee_post_code_employee_street_and_more.py +0 -68
  332. endoreg_db/migrations/_old/0004_remove_casetemplate_rules_and_more.py +0 -77
  333. endoreg_db/migrations/_old/0005_merge_20251111_1003.py +0 -14
  334. endoreg_db/migrations/_old/0006_sensitivemeta_anonymized_text_and_more.py +0 -68
  335. endoreg_db/migrations/_old/0007_remove_rule_attribute_dtype_remove_rule_rule_type_and_more.py +0 -89
  336. endoreg_db/migrations/_old/0008_remove_event_event_classification_and_more.py +0 -27
  337. endoreg_db/migrations/_old/0009_alter_modelmeta_options_and_more.py +0 -21
  338. endoreg_db/renames.yml +0 -8
  339. endoreg_db/serializers/_old/raw_pdf_meta_validation.py +0 -223
  340. endoreg_db/serializers/_old/raw_video_meta_validation.py +0 -179
  341. endoreg_db/serializers/_old/video.py +0 -71
  342. endoreg_db/serializers/meta/pdf_file_meta_extraction.py +0 -115
  343. endoreg_db/serializers/meta/report_meta.py +0 -53
  344. endoreg_db/serializers/report/__init__.py +0 -9
  345. endoreg_db/serializers/report/mixins.py +0 -45
  346. endoreg_db/serializers/report/report.py +0 -105
  347. endoreg_db/serializers/report/report_list.py +0 -22
  348. endoreg_db/serializers/report/secure_file_url.py +0 -26
  349. endoreg_db/services/requirements_object.py +0 -147
  350. endoreg_db/services/storage_aware_video_processor.py +0 -370
  351. endoreg_db/urls/files.py +0 -6
  352. endoreg_db/urls/label_video_segment_validate.py +0 -33
  353. endoreg_db/urls/label_video_segments.py +0 -46
  354. endoreg_db/views/label/__init__.py +0 -5
  355. endoreg_db/views/label/label.py +0 -15
  356. endoreg_db/views/label_video_segment/__init__.py +0 -16
  357. endoreg_db/views/label_video_segment/create_lvs_from_annotation.py +0 -44
  358. endoreg_db/views/label_video_segment/get_lvs_by_name_and_video.py +0 -50
  359. endoreg_db/views/label_video_segment/label_video_segment.py +0 -77
  360. endoreg_db/views/label_video_segment/label_video_segment_by_label.py +0 -174
  361. endoreg_db/views/label_video_segment/label_video_segment_detail.py +0 -73
  362. endoreg_db/views/label_video_segment/update_lvs_from_annotation.py +0 -46
  363. endoreg_db/views/label_video_segment/validate.py +0 -226
  364. endoreg_db/views/media/segments.py +0 -71
  365. endoreg_db/views/meta/available_files_list.py +0 -146
  366. endoreg_db/views/meta/report_meta.py +0 -53
  367. endoreg_db/views/meta/sensitive_meta_detail.py +0 -85
  368. endoreg_db/views/misc/secure_file_serving_view.py +0 -80
  369. endoreg_db/views/misc/secure_file_url_view.py +0 -84
  370. endoreg_db/views/misc/secure_url_validate.py +0 -79
  371. endoreg_db/views/patient_examination/DEPRECATED_video_backup.py +0 -164
  372. endoreg_db/views/patient_finding_location/__init__.py +0 -5
  373. endoreg_db/views/patient_finding_location/pfl_create.py +0 -70
  374. endoreg_db/views/patient_finding_morphology/__init__.py +0 -5
  375. endoreg_db/views/patient_finding_morphology/pfm_create.py +0 -70
  376. endoreg_db/views/pdf/__init__.py +0 -8
  377. endoreg_db/views/video/segmentation.py +0 -274
  378. endoreg_db/views/video/task_status.py +0 -49
  379. endoreg_db/views/video/timeline.py +0 -46
  380. endoreg_db/views/video/video_analyze.py +0 -52
  381. /endoreg_db/data/requirement/{colon_polyp_intervention.yaml → old/colon_polyp_intervention.yaml} +0 -0
  382. /endoreg_db/data/{_examples/requirement → requirement/old}/colonoscopy_baseline_austria.yaml +0 -0
  383. /endoreg_db/data/requirement/{coloreg_colon_polyp.yaml → old/coloreg_colon_polyp.yaml} +0 -0
  384. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_cardiovascular.yaml +0 -0
  385. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_classification_choice_cardiovascular.yaml +0 -0
  386. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_hepatology.yaml +0 -0
  387. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_misc.yaml +0 -0
  388. /endoreg_db/data/{_examples/requirement → requirement/old}/disease_renal.yaml +0 -0
  389. /endoreg_db/data/{_examples/requirement → requirement/old}/endoscopy_bleeding_risk.yaml +0 -0
  390. /endoreg_db/data/{_examples/requirement → requirement/old}/event_cardiology.yaml +0 -0
  391. /endoreg_db/data/{_examples/requirement → requirement/old}/event_requirements.yaml +0 -0
  392. /endoreg_db/data/{_examples/requirement → requirement/old}/finding_colon_polyp.yaml +0 -0
  393. /endoreg_db/{urls/sensitive_meta.py → data/requirement/old/gender.yaml} +0 -0
  394. /endoreg_db/data/{_examples/requirement → requirement/old}/lab_value.yaml +0 -0
  395. /endoreg_db/data/{_examples/requirement → requirement/old}/medication.yaml +0 -0
  396. /endoreg_db/data/{_examples/requirement_operator → requirement_operator/_old}/age.yaml +0 -0
  397. /endoreg_db/data/{_examples/requirement_operator → requirement_operator/_old}/lab_operators.yaml +0 -0
  398. /endoreg_db/data/{_examples/requirement_operator → requirement_operator/_old}/model_operators.yaml +0 -0
  399. /endoreg_db/{views/pdf/pdf_stream_views.py → import_files/pseudonymization/__init__.py} +0 -0
  400. /endoreg_db/utils/requirement_operator_logic/{lab_value_operators.py → _old/lab_value_operators.py} +0 -0
  401. /endoreg_db/utils/requirement_operator_logic/{model_evaluators.py → _old/model_evaluators.py} +0 -0
  402. {endoreg_db-0.8.8.0.dist-info → endoreg_db-0.8.9.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,51 @@
1
+ # endoreg_db/import_files/processing/sensitive_meta_adapter.py
2
+ from typing import Any, Dict
3
+
4
+ from lx_anonymizer.sensitive_meta_interface import SensitiveMeta as LxSensitiveMeta
5
+
6
+
7
+ def normalize_lx_sensitive_meta(meta: LxSensitiveMeta) -> Dict[str, Any]:
8
+ """
9
+ Convert lx_anonymizer.SensitiveMeta into a dict suitable for
10
+ endoreg_db SensitiveMeta.update_from_dict / create_from_dict.
11
+
12
+ - Renames fields where necessary (center -> center_name, patient_gender_name -> patient_gender)
13
+ - Drops None/blank values (your update logic already handles blanks carefully)
14
+ - Leaves dates as strings; your logic layer already parses them
15
+ """
16
+ raw = meta.to_dict()
17
+ out: Dict[str, Any] = {}
18
+
19
+ # 1:1 fields (same names in model logic)
20
+ direct_keys = [
21
+ "file_path",
22
+ "patient_first_name",
23
+ "patient_last_name",
24
+ "patient_dob", # string; logic has parsing
25
+ "casenumber",
26
+ "examination_date", # string; logic has parsing
27
+ "examination_time", # string "HH:MM" is fine
28
+ "examiner_first_name",
29
+ "examiner_last_name",
30
+ "text",
31
+ "anonymized_text",
32
+ "endoscope_type",
33
+ "endoscope_sn",
34
+ ]
35
+ for k in direct_keys:
36
+ v = raw.get(k)
37
+ if v not in (None, "", []):
38
+ out[k] = v
39
+
40
+ # Map patient_gender_name (interface) -> patient_gender (logic)
41
+ gender_name = raw.get("patient_gender_name")
42
+ if gender_name not in (None, ""):
43
+ # Your logic.update_* can handle strings for patient_gender
44
+ out["patient_gender"] = gender_name
45
+
46
+ # Map center (string) -> center_name (logic)
47
+ center_name = raw.get("center")
48
+ if center_name not in (None, ""):
49
+ out["center_name"] = center_name
50
+
51
+ return out
@@ -0,0 +1,107 @@
1
+ from typing import List, Dict, Any, Tuple, Optional
2
+
3
+ import logging
4
+ logger = logging.getLogger(__name__)
5
+
6
+ from lx_anonymizer import FrameCleaner
7
+ from lx_anonymizer.sensitive_meta_interface import SensitiveMeta as LxSM
8
+
9
+
10
+ from endoreg_db.import_files.file_storage.sensitive_meta_storage import sensitive_meta_storage
11
+ from endoreg_db.import_files.context import ImportContext
12
+ from endoreg_db.utils.paths import ANONYM_VIDEO_DIR
13
+ from endoreg_db.models import EndoscopyProcessor, VideoFile
14
+
15
+
16
+ class VideoAnonymizer:
17
+ def __init__(self):
18
+ self._ensure_frame_cleaning_available()
19
+ self._frame_cleaning_available = None
20
+ self._frame_cleaning_class = None
21
+ self.storage = False
22
+
23
+
24
+ def anonymize_video(self, ctx: ImportContext):
25
+ # Setup anonymized directory
26
+ anonymized_dir = ANONYM_VIDEO_DIR
27
+ anonymized_dir.mkdir(parents=True, exist_ok=True)
28
+ assert ctx.current_video is not None
29
+ # Generate output path for anonymized report
30
+
31
+ video_hash = ctx.current_video.video_hash
32
+ anonymized_output_path = anonymized_dir / f"{video_hash}.mp4"
33
+
34
+ self._frame_cleaning_class = FrameCleaner()
35
+
36
+ assert isinstance(self._frame_cleaning_class, FrameCleaner)
37
+ endoscope_roi, endoscope_roi_nested = self._get_processor_roi_info(ctx)
38
+ # Process with enhanced process_report method (returns 4-tuple now)
39
+ ctx.anonymized_path, extracted_metadata = self._frame_cleaning_class.clean_video(
40
+ video_path=ctx.file_path,
41
+ endoscope_image_roi=endoscope_roi,
42
+ endoscope_data_roi_nested=endoscope_roi_nested,
43
+ output_path=anonymized_output_path
44
+
45
+ )
46
+ sm = LxSM()
47
+ sm.safe_update(extracted_metadata)
48
+
49
+ self.storage = sensitive_meta_storage(sm, ctx.current_video)
50
+ return ctx
51
+
52
+ def _ensure_frame_cleaning_available(self):
53
+ """
54
+ Ensure frame cleaning modules are available by adding lx-anonymizer to path.
55
+
56
+ Returns:
57
+ Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
58
+ """
59
+ try:
60
+ from lx_anonymizer import FrameCleaner
61
+ except Exception as e:
62
+ logger.warning(
63
+ f"Frame cleaning not available: {e} Please install or update lx_anonymizer."
64
+ )
65
+ raise
66
+
67
+ assert FrameCleaner is not None
68
+ self._frame_cleaning_class = FrameCleaner()
69
+ self._frame_cleaning_available = True
70
+
71
+
72
+ def _get_processor_roi_info(
73
+ self,
74
+ ctx: ImportContext,
75
+ ) -> tuple[dict[str, int | None] | None,
76
+ dict[str, dict[str, int | None] | None] | None]:
77
+ """Get processor ROI information for masking and data extraction."""
78
+ endoscope_data_roi_nested = None
79
+ endoscope_image_roi = None
80
+
81
+ video = ctx.current_video
82
+ assert isinstance(video, VideoFile)
83
+
84
+ try:
85
+ processor_name = ctx.processor_name if ctx.processor_name else None
86
+ if processor_name:
87
+ pr = EndoscopyProcessor()
88
+ processor = pr.get_by_name(processor_name)
89
+ assert isinstance(processor, EndoscopyProcessor), (
90
+ "Processor is not of type EndoscopyProcessor"
91
+ )
92
+ endoscope_image_roi = processor.get_roi_endoscope_image()
93
+ endoscope_data_roi_nested = processor.get_sensitive_rois()
94
+ logger.info(
95
+ "Retrieved processor ROI information: endoscope_image_roi=%s",
96
+ endoscope_image_roi,
97
+ )
98
+ else:
99
+ logger.warning(
100
+ "No processor found for video %s, proceeding without ROI masking",
101
+ video.uuid,
102
+ )
103
+ except Exception as exc:
104
+ logger.error("Failed to retrieve processor ROI information: %s", exc)
105
+
106
+ # IMPORTANT: return order must match clean_video signature
107
+ return endoscope_image_roi, endoscope_data_roi_nested
@@ -0,0 +1,52 @@
1
+ from datetime import date, timedelta
2
+ from typing import Tuple, Optional
3
+ from faker import Faker
4
+ import random
5
+
6
+
7
+ def fake_name_with_similar_dob_and_gender(
8
+ gender: str,
9
+ dob: date,
10
+ *,
11
+ year_tolerance: int = 3,
12
+ locale: str = "de_DE",
13
+ seed: Optional[int] = None,
14
+ ) -> Tuple[str, str, date]:
15
+ """
16
+ Generate a fake name with the same gender and a similar date of birth.
17
+
18
+ Args:
19
+ gender: "male" or "female"
20
+ dob: Original date of birth
21
+ year_tolerance: Maximum age difference in years
22
+ locale: Faker locale (default: German)
23
+ seed: Optional reproducible seed
24
+
25
+ Returns:
26
+ (full_name, fake_dob)
27
+ """
28
+
29
+ if gender not in {"male", "female"}:
30
+ raise ValueError("gender must be 'male' or 'female'")
31
+
32
+ fake = Faker(locale)
33
+
34
+ if seed is not None:
35
+ Faker.seed(seed)
36
+ random.seed(seed)
37
+
38
+ # --- Generate gender-safe name ---
39
+ if gender == "male":
40
+ first_name = fake.first_name_male()
41
+ else:
42
+ first_name = fake.first_name_female()
43
+
44
+ last_name = fake.last_name()
45
+ full_name = f"{first_name} {last_name}"
46
+
47
+ # --- Generate similar DOB ---
48
+ days_range = year_tolerance * 365
49
+ offset_days = random.randint(-days_range, days_range)
50
+ fake_dob = dob + timedelta(days=offset_days)
51
+
52
+ return first_name, last_name, fake_dob
@@ -0,0 +1,182 @@
1
+ from endoreg_db.models import Patient, SensitiveMeta, Center, Gender
2
+ import logging
3
+ from datetime import timedelta
4
+ from typing import Tuple
5
+
6
+ from django.db.models import QuerySet
7
+
8
+ from itertools import combinations
9
+ from typing import Dict, Tuple, List
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ QI_FLAGS = ["first_name", "last_name", "center", "gender", "dob_band"]
15
+
16
+
17
+ def get_k_profile_for_instance(
18
+ instance: SensitiveMeta,
19
+ *,
20
+ dob_year_tolerance: int = 1,
21
+ include_self: bool = True,
22
+ ) -> Dict[Tuple[str, ...], int]:
23
+ """
24
+ For a given SensitiveMeta instance, compute k (equivalence class size)
25
+ for all non-empty subsets of the quasi-identifiers defined in QI_FLAGS.
26
+
27
+ Returns:
28
+ {
29
+ ('first_name',): 12,
30
+ ('center', 'gender'): 45,
31
+ ('first_name', 'last_name', 'dob_band'): 3,
32
+ ...
33
+ }
34
+ """
35
+ result: Dict[Tuple[str, ...], int] = {}
36
+
37
+ for r in range(1, len(QI_FLAGS) + 1):
38
+ for subset in combinations(QI_FLAGS, r):
39
+ use_first_name = "first_name" in subset
40
+ use_last_name = "last_name" in subset
41
+ use_center = "center" in subset
42
+ use_gender = "gender" in subset
43
+ use_dob_band = "dob_band" in subset
44
+
45
+ qs = _build_sensitive_meta_qi_queryset(
46
+ instance,
47
+ dob_year_tolerance=dob_year_tolerance,
48
+ include_self=include_self,
49
+ use_first_name=use_first_name,
50
+ use_last_name=use_last_name,
51
+ use_center=use_center,
52
+ use_gender=use_gender,
53
+ use_dob_band=use_dob_band,
54
+ )
55
+
56
+ k_value = qs.count()
57
+ result[subset] = k_value
58
+
59
+ return result
60
+
61
+
62
+ def get_k_anonymity(pk, k=3):
63
+ """
64
+ How anonymized is a patient?
65
+ Get the k value for how many patients can be matched to the current patients attributes.
66
+
67
+ Args:
68
+ pk (_type_): _description_
69
+ k (int, optional): _description_. Defaults to 3.
70
+ """
71
+ return get_k_anonymity_for_sensitive_meta(pk=pk, k=k, dob_year_tolerance=1)
72
+
73
+
74
+ def _build_sensitive_meta_qi_queryset(
75
+ instance: SensitiveMeta,
76
+ *,
77
+ dob_year_tolerance: int = 1,
78
+ include_self: bool = True,
79
+ use_first_name: bool = True,
80
+ use_last_name: bool = True,
81
+ use_center: bool = True,
82
+ use_gender: bool = True,
83
+ use_dob_band: bool = True,
84
+ ) -> QuerySet[SensitiveMeta]:
85
+ """
86
+ Build a queryset of SensitiveMeta records that are indistinguishable from
87
+ `instance` on the chosen quasi-identifiers:
88
+
89
+ - same center
90
+ - same patient_gender
91
+ - patient_dob within ±dob_year_tolerance years (approx via days)
92
+
93
+ Args:
94
+ instance: The SensitiveMeta instance we evaluate.
95
+ dob_year_tolerance: Allowed +- years around patient_dob.
96
+ include_self: Whether to include `instance` itself in the result.
97
+
98
+ Returns:
99
+ A Django QuerySet for further aggregation.
100
+ """
101
+ qs = SensitiveMeta.objects.all()
102
+
103
+ if use_first_name and instance.patient_first_name is not None:
104
+ qs = qs.filter(patient_first_name=instance.patient_first_name)
105
+
106
+ if use_last_name and instance.patient_first_name is not None:
107
+ qs = qs.filter(patient_first_name=instance.patient_first_name)
108
+
109
+ # --- Center ---
110
+ if use_center and instance.center is not None:
111
+ if instance.center.pk is not None:
112
+ qs = qs.filter(center=instance.center.pk)
113
+
114
+ # --- Gender ---
115
+ if use_gender and instance.patient_gender is not None
116
+ if instance.patient_gender.pk is not None:
117
+ qs = qs.filter(patient_gender_id=instance.patient_gender)
118
+
119
+ # --- DOB (approximate ±N years using days) ---
120
+ if use_dob_band and instance.patient_dob is not None:
121
+ days = dob_year_tolerance * 365
122
+ ref_date = instance.patient_dob.date()
123
+ start = ref_date - timedelta(days=days)
124
+ end = ref_date + timedelta(days=days)
125
+ qs = qs.filter(patient_dob__date__range=(start, end))
126
+
127
+ # --- Exclude self if requested ---
128
+ if not include_self and instance.pk is not None:
129
+ qs = qs.exclude(pk=instance.pk)
130
+
131
+ return qs
132
+
133
+
134
+ def get_k_anonymity_for_sensitive_meta(
135
+ pk: int,
136
+ *,
137
+ k: int = 3,
138
+ dob_year_tolerance: int = 1,
139
+ ) -> Tuple[int, bool]:
140
+ """
141
+ Compute the k-anonymity (equivalence class size) for a SensitiveMeta record.
142
+
143
+ k-anonymity here is defined as the number of SensitiveMeta rows that share
144
+ the same quasi-identifiers as the given record:
145
+
146
+ - center
147
+ - patient_gender
148
+ - patient_dob within ±dob_year_tolerance years (approximate)
149
+
150
+ Args:
151
+ pk: Primary key of the SensitiveMeta instance to evaluate.
152
+ k: Desired anonymity threshold (e.g. 3 for 3-anonymity).
153
+ dob_year_tolerance: Allowed age window in years around patient_dob.
154
+
155
+ Returns:
156
+ (k_value, is_k_anonymous) where:
157
+ k_value = size of the equivalence class
158
+ is_k_anonymous = True if k_value >= k
159
+ """
160
+ try:
161
+ sm = SensitiveMeta.objects.get(pk=pk)
162
+ except SensitiveMeta.DoesNotExist:
163
+ raise ValueError(f"SensitiveMeta with pk={pk} does not exist")
164
+
165
+ qs = _build_sensitive_meta_qi_queryset(
166
+ sm,
167
+ dob_year_tolerance=dob_year_tolerance,
168
+ include_self=True,
169
+ )
170
+
171
+ k_value = qs.count()
172
+ is_k_anon = k_value >= k
173
+
174
+ logger.info(
175
+ "k-anonymity for SensitiveMeta pk=%s -> k=%s (threshold=%s, dob_tol=%s years)",
176
+ pk,
177
+ k_value,
178
+ k,
179
+ dob_year_tolerance,
180
+ )
181
+
182
+ return k_value, is_k_anon
@@ -0,0 +1,128 @@
1
+ from typing import Optional, Tuple
2
+
3
+ from datetime import date as Date
4
+ import datetime
5
+
6
+ from .k_anonymity import _build_sensitive_meta_qi_queryset
7
+ from .fake import fake_name_with_similar_dob_and_gender
8
+
9
+ from endoreg_db.models import SensitiveMeta
10
+
11
+ def k_pseudonymize(
12
+ instance: SensitiveMeta,
13
+ *,
14
+ k_threshold: int = 3,
15
+ dob_year_tolerance: int = 3,
16
+ qi_subset: Optional[Tuple[str, ...]] = None,
17
+ locale: str = "de_DE",
18
+ seed: Optional[int] = None,
19
+ save: bool = True,
20
+ ) -> Tuple[SensitiveMeta, int, bool]:
21
+ """
22
+ Ensure a SensitiveMeta instance reaches at least `k_threshold` anonymity
23
+ for the given quasi-identifier subset by pseudonymizing patient
24
+ first_name, last_name and DOB if necessary.
25
+
26
+ Args:
27
+ instance:
28
+ The SensitiveMeta instance to process.
29
+ k_threshold:
30
+ Minimal k for the chosen QI subset.
31
+ dob_year_tolerance:
32
+ Used both for k-anonymity DOB band and for Faker's DOB perturbation.
33
+ qi_subset:
34
+ Which QIs to use for k-anonymity check.
35
+ Elements from: {"first_name", "last_name", "center", "gender", "dob_band"}.
36
+ Default = all of them.
37
+ locale:
38
+ Faker locale for a realistic name.
39
+ seed:
40
+ Optional seed for reproducibility.
41
+ save:
42
+ If True, save the instance after pseudonymization.
43
+
44
+ Returns:
45
+ (instance, k_value_after, is_k_anonymous_after)
46
+ """
47
+ if qi_subset is None:
48
+ qi_subset = tuple(QI_FLAGS)
49
+
50
+ # --- 1) Compute k for the requested subset BEFORE pseudonymization ---
51
+ use_first_name = "first_name" in qi_subset
52
+ use_last_name = "last_name" in qi_subset
53
+ use_center = "center" in qi_subset
54
+ use_gender = "gender" in qi_subset
55
+ use_dob_band = "dob_band" in qi_subset
56
+
57
+ qs_before = _build_sensitive_meta_qi_queryset(
58
+ instance,
59
+ dob_year_tolerance=dob_year_tolerance,
60
+ include_self=True,
61
+ use_first_name=use_first_name,
62
+ use_last_name=use_last_name,
63
+ use_center=use_center,
64
+ use_gender=use_gender,
65
+ use_dob_band=use_dob_band,
66
+ )
67
+ k_before = qs_before.count()
68
+
69
+ if k_before >= k_threshold:
70
+ # Already sufficiently anonymous, nothing to do
71
+ return instance, k_before, True
72
+
73
+ # --- 2) Pseudonymize name + DOB using Faker ---
74
+ # Gender string for Faker
75
+ if instance.patient_gender and getattr(instance.patient_gender, "name", None):
76
+ gender_name = instance.patient_gender.name
77
+ else:
78
+ # Fallback if gender missing -> bias to 'male' but you can change that
79
+ gender_name = "male"
80
+
81
+ # Original DOB as date (fallback to today's date if missing)
82
+ if instance.patient_dob is not None:
83
+ orig_dob: Date = instance.patient_dob.date()
84
+ else:
85
+ orig_dob = Date.today()
86
+
87
+ first_name, last_name, fake_dob = fake_name_with_similar_dob_and_gender(
88
+ gender=gender_name,
89
+ dob=orig_dob,
90
+ year_tolerance=dob_year_tolerance,
91
+ locale=locale,
92
+ seed=seed,
93
+ )
94
+
95
+ # Assign to instance (SensitiveMeta.patient_dob is a DateTimeField)
96
+ instance.patient_first_name = first_name
97
+ instance.patient_last_name = last_name
98
+ instance.patient_dob = datetime(
99
+ fake_dob.year, fake_dob.month, fake_dob.day
100
+ ) # naive is usually fine for DOB
101
+
102
+ if save:
103
+ instance.save(update_fields=["patient_first_name", "patient_last_name", "patient_dob"])
104
+
105
+ # --- 3) Recompute k AFTER pseudonymization ---
106
+ qs_after = _build_sensitive_meta_qi_queryset(
107
+ instance,
108
+ dob_year_tolerance=dob_year_tolerance,
109
+ include_self=True,
110
+ use_first_name=use_first_name,
111
+ use_last_name=use_last_name,
112
+ use_center=use_center,
113
+ use_gender=use_gender,
114
+ use_dob_band=use_dob_band,
115
+ )
116
+ k_after = qs_after.count()
117
+ is_k_anon_after = k_after >= k_threshold
118
+
119
+ logger.info(
120
+ "k_pseudonymize: SensitiveMeta pk=%s, subset=%s, k_before=%s, k_after=%s, threshold=%s",
121
+ instance.pk,
122
+ qi_subset,
123
+ k_before,
124
+ k_after,
125
+ k_threshold,
126
+ )
127
+
128
+ return instance, k_after, is_k_anon_after
@@ -0,0 +1,141 @@
1
+ # endoreg_db/services/report_import_service.py
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Optional, Union
7
+
8
+ from endoreg_db.import_files.context.file_lock import file_lock
9
+ from endoreg_db.import_files.context.import_context import ImportContext
10
+ from endoreg_db.import_files.processing.report_processing.report_anonymization import (
11
+ ReportAnonymizer,
12
+ )
13
+
14
+ from endoreg_db.import_files.file_storage.create_report_file import (
15
+ create_or_retrieve_report_file,
16
+ )
17
+ from endoreg_db.import_files.file_storage.state_management import (
18
+ finalize_report_success,
19
+ finalize_failure,
20
+ mark_instance_processing_started,
21
+ )
22
+ from endoreg_db.import_files.context.validate_directories import validate_directories
23
+
24
+ from endoreg_db.import_files.file_storage.storage import create_sensitive_copy
25
+ from endoreg_db.models.media import RawPdfFile
26
+ from endoreg_db.utils.paths import (
27
+ SENSITIVE_REPORT_DIR,
28
+ )
29
+
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class ReportImportService:
35
+ """
36
+ Service for importing and anonymizing report (PDF) files.
37
+
38
+ Responsibilities:
39
+ - Acquire file lock
40
+ - Create sensitive copy
41
+ - Create/reuse RawPdfFile (dedupe by hash) + history
42
+ - Run anonymization pipeline (primary + fallback)
43
+ - Finalize state and move anonymized file
44
+ - Cleanup on error
45
+ """
46
+
47
+ def __init__(self) -> None:
48
+ self.logger = logger
49
+ self.anonymizer = ReportAnonymizer()
50
+ self.processing_context: Optional[ImportContext] = None
51
+ self.current_report: Optional[RawPdfFile] = None
52
+
53
+ validate_directories()
54
+
55
+
56
+ def import_and_anonymize(
57
+ self,
58
+ file_path: Union[Path, str],
59
+ center_name: str,
60
+ retry: bool = False,
61
+ delete_source: bool = True,
62
+ ) -> "RawPdfFile | None":
63
+ """
64
+ Public entrypoint: wrap import_and_anonymize logic.
65
+ """
66
+ # First, initialize import context. this will be updated during import and keep track of current paths, file type and center and processor.
67
+ ctx = ImportContext(
68
+ file_path=Path(file_path),
69
+ center_name=center_name,
70
+ delete_source=delete_source,
71
+ file_type="report"
72
+ )
73
+ self.logger.info("validating and preparing file")
74
+ if not ctx.file_path.exists():
75
+ raise FileNotFoundError(f"Video file not found: {file_path}")
76
+
77
+ ctx.sensitive_path = create_sensitive_copy(
78
+ ctx.file_path,
79
+ SENSITIVE_REPORT_DIR
80
+ )
81
+
82
+ with file_lock(ctx.file_path):
83
+ logger.info("Acquired file lock for %s", ctx.file_path)
84
+
85
+ # create or retrieve RawPdfFile + update history
86
+ ctx.current_report, needs_processing = create_or_retrieve_report_file(ctx)
87
+ ctx.current_report.get_or_create_state()
88
+ assert(ctx.current_report.state is not None)
89
+ ctx.current_report = ctx.current_report
90
+
91
+ ctx.retry = retry
92
+ # Retry is a forced overwrite of needs processing - therefore the retry will cause full deletion of processed files using finalize failure.
93
+ if retry and needs_processing and not ctx.current_report.state.anonymization_validated:
94
+ # ensure clean slate for forced reprocessing
95
+ finalize_failure(ctx)
96
+ ctx.current_report, needs_processing = create_or_retrieve_report_file(ctx)
97
+ assert(needs_processing is True)
98
+ elif not needs_processing and not retry:
99
+ return ctx.current_report
100
+
101
+ mark_instance_processing_started(ctx.current_report, ctx)
102
+ try:
103
+ # --- Anonymization with fallback ---
104
+ try:
105
+ ctx = self.anonymizer.anonymize_report(ctx)
106
+ logger.info(
107
+ "Primary report anonymization succeeded for %s",
108
+ ctx.file_path,
109
+ )
110
+ except Exception as primary_exc:
111
+ logger.exception(
112
+ "Primary report anonymization failed for %s: %s "
113
+ "- trying basic anonymization",
114
+ ctx.file_path,
115
+ primary_exc,
116
+ )
117
+ try:
118
+ ctx = self.anonymizer.anonymize_report(ctx)
119
+ except Exception as e:
120
+ logger.error(f"PDF Extraction failed for the second time. {e}")
121
+ raise
122
+
123
+ logger.info(
124
+ "Basic report anonymization succeeded for %s",
125
+ ctx.file_path,
126
+ )
127
+
128
+ # --- Finalize success: history + move anonymized file ---
129
+ finalize_report_success(ctx)
130
+
131
+ return ctx.current_report
132
+
133
+ except Exception as exc:
134
+ logger.exception(
135
+ "Report import/anonymization failed for %s: %s", ctx.file_path, exc
136
+ )
137
+ # mark failure in history
138
+ finalize_failure(ctx)
139
+ raise
140
+
141
+