endoreg-db 0.8.9.2__py3-none-any.whl → 0.8.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (450) hide show
  1. endoreg_db/admin.py +10 -5
  2. endoreg_db/apps.py +4 -7
  3. endoreg_db/authz/auth.py +1 -0
  4. endoreg_db/authz/backends.py +1 -1
  5. endoreg_db/authz/management/commands/list_routes.py +2 -0
  6. endoreg_db/authz/middleware.py +8 -7
  7. endoreg_db/authz/permissions.py +21 -10
  8. endoreg_db/authz/policy.py +14 -19
  9. endoreg_db/authz/views_auth.py +14 -10
  10. endoreg_db/codemods/rename_datetime_fields.py +8 -1
  11. endoreg_db/exceptions.py +5 -2
  12. endoreg_db/forms/__init__.py +0 -1
  13. endoreg_db/forms/examination_form.py +4 -3
  14. endoreg_db/forms/patient_finding_intervention_form.py +30 -8
  15. endoreg_db/forms/patient_form.py +9 -13
  16. endoreg_db/forms/questionnaires/__init__.py +1 -1
  17. endoreg_db/forms/settings/__init__.py +4 -1
  18. endoreg_db/forms/unit.py +2 -1
  19. endoreg_db/helpers/count_db.py +17 -14
  20. endoreg_db/helpers/default_objects.py +2 -1
  21. endoreg_db/helpers/download_segmentation_model.py +4 -3
  22. endoreg_db/helpers/interact.py +0 -5
  23. endoreg_db/helpers/test_video_helper.py +33 -25
  24. endoreg_db/import_files/__init__.py +1 -1
  25. endoreg_db/import_files/context/__init__.py +1 -1
  26. endoreg_db/import_files/context/default_sensitive_meta.py +11 -9
  27. endoreg_db/import_files/context/ensure_center.py +4 -4
  28. endoreg_db/import_files/context/file_lock.py +3 -3
  29. endoreg_db/import_files/context/import_context.py +11 -12
  30. endoreg_db/import_files/context/validate_directories.py +1 -0
  31. endoreg_db/import_files/file_storage/create_report_file.py +57 -34
  32. endoreg_db/import_files/file_storage/create_video_file.py +64 -35
  33. endoreg_db/import_files/file_storage/sensitive_meta_storage.py +5 -2
  34. endoreg_db/import_files/file_storage/state_management.py +89 -122
  35. endoreg_db/import_files/file_storage/storage.py +5 -1
  36. endoreg_db/import_files/processing/report_processing/report_anonymization.py +24 -19
  37. endoreg_db/import_files/processing/sensitive_meta_adapter.py +3 -3
  38. endoreg_db/import_files/processing/video_processing/video_anonymization.py +18 -18
  39. endoreg_db/import_files/pseudonymization/k_anonymity.py +8 -9
  40. endoreg_db/import_files/pseudonymization/k_pseudonymity.py +16 -5
  41. endoreg_db/import_files/report_import_service.py +36 -30
  42. endoreg_db/import_files/video_import_service.py +27 -23
  43. endoreg_db/logger_conf.py +56 -40
  44. endoreg_db/management/__init__.py +1 -1
  45. endoreg_db/management/commands/__init__.py +1 -1
  46. endoreg_db/management/commands/check_auth.py +45 -38
  47. endoreg_db/management/commands/create_model_meta_from_huggingface.py +53 -2
  48. endoreg_db/management/commands/create_multilabel_model_meta.py +54 -19
  49. endoreg_db/management/commands/fix_missing_patient_data.py +105 -71
  50. endoreg_db/management/commands/fix_video_paths.py +75 -54
  51. endoreg_db/management/commands/import_report.py +1 -3
  52. endoreg_db/management/commands/list_routes.py +2 -0
  53. endoreg_db/management/commands/load_ai_model_data.py +8 -2
  54. endoreg_db/management/commands/load_ai_model_label_data.py +0 -1
  55. endoreg_db/management/commands/load_center_data.py +3 -3
  56. endoreg_db/management/commands/load_distribution_data.py +35 -38
  57. endoreg_db/management/commands/load_endoscope_data.py +0 -3
  58. endoreg_db/management/commands/load_examination_data.py +20 -4
  59. endoreg_db/management/commands/load_finding_data.py +18 -3
  60. endoreg_db/management/commands/load_gender_data.py +17 -24
  61. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +95 -85
  62. endoreg_db/management/commands/load_information_source.py +0 -3
  63. endoreg_db/management/commands/load_lab_value_data.py +14 -3
  64. endoreg_db/management/commands/load_legacy_data.py +303 -0
  65. endoreg_db/management/commands/load_name_data.py +1 -2
  66. endoreg_db/management/commands/load_pdf_type_data.py +4 -8
  67. endoreg_db/management/commands/load_profession_data.py +0 -1
  68. endoreg_db/management/commands/load_report_reader_flag_data.py +0 -4
  69. endoreg_db/management/commands/load_requirement_data.py +6 -2
  70. endoreg_db/management/commands/load_unit_data.py +0 -4
  71. endoreg_db/management/commands/load_user_groups.py +5 -7
  72. endoreg_db/management/commands/model_input.py +169 -0
  73. endoreg_db/management/commands/register_ai_model.py +22 -16
  74. endoreg_db/management/commands/setup_endoreg_db.py +110 -32
  75. endoreg_db/management/commands/storage_management.py +14 -8
  76. endoreg_db/management/commands/summarize_db_content.py +154 -63
  77. endoreg_db/management/commands/train_image_multilabel_model.py +144 -0
  78. endoreg_db/management/commands/validate_video_files.py +82 -50
  79. endoreg_db/management/commands/video_validation.py +4 -6
  80. endoreg_db/migrations/0001_initial.py +112 -63
  81. endoreg_db/models/__init__.py +8 -0
  82. endoreg_db/models/administration/ai/active_model.py +5 -5
  83. endoreg_db/models/administration/ai/ai_model.py +41 -18
  84. endoreg_db/models/administration/ai/model_type.py +1 -0
  85. endoreg_db/models/administration/case/case.py +22 -22
  86. endoreg_db/models/administration/center/__init__.py +5 -5
  87. endoreg_db/models/administration/center/center.py +6 -2
  88. endoreg_db/models/administration/center/center_resource.py +18 -4
  89. endoreg_db/models/administration/center/center_shift.py +3 -1
  90. endoreg_db/models/administration/center/center_waste.py +6 -2
  91. endoreg_db/models/administration/person/__init__.py +1 -1
  92. endoreg_db/models/administration/person/employee/__init__.py +1 -1
  93. endoreg_db/models/administration/person/employee/employee_type.py +3 -1
  94. endoreg_db/models/administration/person/examiner/__init__.py +1 -1
  95. endoreg_db/models/administration/person/examiner/examiner.py +10 -2
  96. endoreg_db/models/administration/person/names/first_name.py +6 -4
  97. endoreg_db/models/administration/person/names/last_name.py +4 -3
  98. endoreg_db/models/administration/person/patient/__init__.py +1 -1
  99. endoreg_db/models/administration/person/patient/patient.py +0 -1
  100. endoreg_db/models/administration/person/patient/patient_external_id.py +0 -1
  101. endoreg_db/models/administration/person/person.py +1 -1
  102. endoreg_db/models/administration/product/__init__.py +7 -6
  103. endoreg_db/models/administration/product/product.py +6 -2
  104. endoreg_db/models/administration/product/product_group.py +9 -7
  105. endoreg_db/models/administration/product/product_material.py +9 -2
  106. endoreg_db/models/administration/product/reference_product.py +64 -15
  107. endoreg_db/models/administration/qualification/qualification.py +3 -1
  108. endoreg_db/models/administration/shift/shift.py +3 -1
  109. endoreg_db/models/administration/shift/shift_type.py +12 -4
  110. endoreg_db/models/aidataset/__init__.py +5 -0
  111. endoreg_db/models/aidataset/aidataset.py +193 -0
  112. endoreg_db/models/label/__init__.py +1 -1
  113. endoreg_db/models/label/label.py +10 -2
  114. endoreg_db/models/label/label_set.py +3 -1
  115. endoreg_db/models/label/label_video_segment/_create_from_video.py +6 -2
  116. endoreg_db/models/label/label_video_segment/label_video_segment.py +148 -44
  117. endoreg_db/models/media/__init__.py +12 -5
  118. endoreg_db/models/media/frame/__init__.py +1 -1
  119. endoreg_db/models/media/frame/frame.py +34 -8
  120. endoreg_db/models/media/pdf/__init__.py +2 -1
  121. endoreg_db/models/media/pdf/raw_pdf.py +11 -4
  122. endoreg_db/models/media/pdf/report_file.py +6 -2
  123. endoreg_db/models/media/pdf/report_reader/__init__.py +3 -3
  124. endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +15 -5
  125. endoreg_db/models/media/video/create_from_file.py +20 -41
  126. endoreg_db/models/media/video/pipe_1.py +75 -30
  127. endoreg_db/models/media/video/pipe_2.py +37 -12
  128. endoreg_db/models/media/video/video_file.py +36 -24
  129. endoreg_db/models/media/video/video_file_ai.py +235 -70
  130. endoreg_db/models/media/video/video_file_anonymize.py +240 -65
  131. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -1
  132. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +3 -1
  133. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +30 -9
  134. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +95 -29
  135. endoreg_db/models/media/video/video_file_frames/_get_frame.py +13 -3
  136. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -1
  137. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +15 -3
  138. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +15 -3
  139. endoreg_db/models/media/video/video_file_frames/_get_frames.py +7 -2
  140. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +109 -23
  141. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +111 -27
  142. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +46 -13
  143. endoreg_db/models/media/video/video_file_io.py +85 -33
  144. endoreg_db/models/media/video/video_file_meta/__init__.py +6 -6
  145. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +17 -4
  146. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +28 -7
  147. endoreg_db/models/media/video/video_file_meta/get_fps.py +46 -13
  148. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +81 -20
  149. endoreg_db/models/media/video/video_file_meta/text_meta.py +61 -20
  150. endoreg_db/models/media/video/video_file_meta/video_meta.py +40 -12
  151. endoreg_db/models/media/video/video_file_segments.py +118 -27
  152. endoreg_db/models/media/video/video_metadata.py +25 -6
  153. endoreg_db/models/media/video/video_processing.py +54 -15
  154. endoreg_db/models/medical/__init__.py +3 -13
  155. endoreg_db/models/medical/contraindication/__init__.py +3 -1
  156. endoreg_db/models/medical/disease.py +18 -6
  157. endoreg_db/models/medical/event.py +6 -2
  158. endoreg_db/models/medical/examination/__init__.py +5 -1
  159. endoreg_db/models/medical/examination/examination.py +22 -6
  160. endoreg_db/models/medical/examination/examination_indication.py +23 -7
  161. endoreg_db/models/medical/examination/examination_time.py +6 -2
  162. endoreg_db/models/medical/finding/__init__.py +3 -1
  163. endoreg_db/models/medical/finding/finding.py +37 -12
  164. endoreg_db/models/medical/finding/finding_classification.py +27 -8
  165. endoreg_db/models/medical/finding/finding_intervention.py +19 -6
  166. endoreg_db/models/medical/finding/finding_type.py +3 -1
  167. endoreg_db/models/medical/hardware/__init__.py +1 -1
  168. endoreg_db/models/medical/hardware/endoscope.py +14 -2
  169. endoreg_db/models/medical/laboratory/__init__.py +1 -1
  170. endoreg_db/models/medical/laboratory/lab_value.py +139 -39
  171. endoreg_db/models/medical/medication/__init__.py +7 -3
  172. endoreg_db/models/medical/medication/medication.py +3 -1
  173. endoreg_db/models/medical/medication/medication_indication.py +3 -1
  174. endoreg_db/models/medical/medication/medication_indication_type.py +11 -3
  175. endoreg_db/models/medical/medication/medication_intake_time.py +3 -1
  176. endoreg_db/models/medical/medication/medication_schedule.py +3 -1
  177. endoreg_db/models/medical/patient/__init__.py +2 -10
  178. endoreg_db/models/medical/patient/medication_examples.py +3 -14
  179. endoreg_db/models/medical/patient/patient_disease.py +17 -5
  180. endoreg_db/models/medical/patient/patient_event.py +12 -4
  181. endoreg_db/models/medical/patient/patient_examination.py +52 -15
  182. endoreg_db/models/medical/patient/patient_examination_indication.py +15 -4
  183. endoreg_db/models/medical/patient/patient_finding.py +105 -29
  184. endoreg_db/models/medical/patient/patient_finding_classification.py +41 -12
  185. endoreg_db/models/medical/patient/patient_finding_intervention.py +11 -3
  186. endoreg_db/models/medical/patient/patient_lab_sample.py +6 -2
  187. endoreg_db/models/medical/patient/patient_lab_value.py +42 -10
  188. endoreg_db/models/medical/patient/patient_medication.py +25 -7
  189. endoreg_db/models/medical/patient/patient_medication_schedule.py +34 -10
  190. endoreg_db/models/metadata/model_meta.py +40 -12
  191. endoreg_db/models/metadata/model_meta_logic.py +51 -16
  192. endoreg_db/models/metadata/sensitive_meta.py +65 -28
  193. endoreg_db/models/metadata/sensitive_meta_logic.py +28 -26
  194. endoreg_db/models/metadata/video_meta.py +146 -39
  195. endoreg_db/models/metadata/video_prediction_logic.py +70 -21
  196. endoreg_db/models/metadata/video_prediction_meta.py +80 -27
  197. endoreg_db/models/operation_log.py +63 -0
  198. endoreg_db/models/other/__init__.py +10 -10
  199. endoreg_db/models/other/distribution/__init__.py +9 -7
  200. endoreg_db/models/other/distribution/base_value_distribution.py +3 -1
  201. endoreg_db/models/other/distribution/date_value_distribution.py +19 -5
  202. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +3 -1
  203. endoreg_db/models/other/distribution/numeric_value_distribution.py +34 -9
  204. endoreg_db/models/other/emission/__init__.py +1 -1
  205. endoreg_db/models/other/emission/emission_factor.py +9 -3
  206. endoreg_db/models/other/information_source.py +15 -5
  207. endoreg_db/models/other/material.py +3 -1
  208. endoreg_db/models/other/transport_route.py +3 -1
  209. endoreg_db/models/other/unit.py +6 -2
  210. endoreg_db/models/report/report.py +0 -1
  211. endoreg_db/models/requirement/requirement.py +84 -27
  212. endoreg_db/models/requirement/requirement_error.py +5 -6
  213. endoreg_db/models/requirement/requirement_evaluation/__init__.py +1 -1
  214. endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +8 -8
  215. endoreg_db/models/requirement/requirement_evaluation/get_values.py +3 -3
  216. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +24 -8
  217. endoreg_db/models/requirement/requirement_operator.py +28 -8
  218. endoreg_db/models/requirement/requirement_set.py +34 -11
  219. endoreg_db/models/state/__init__.py +1 -0
  220. endoreg_db/models/state/audit_ledger.py +9 -2
  221. endoreg_db/models/{media → state}/processing_history/__init__.py +1 -3
  222. endoreg_db/models/state/processing_history/processing_history.py +136 -0
  223. endoreg_db/models/state/raw_pdf.py +0 -1
  224. endoreg_db/models/state/video.py +2 -4
  225. endoreg_db/models/utils.py +4 -2
  226. endoreg_db/queries/__init__.py +2 -6
  227. endoreg_db/queries/annotations/__init__.py +1 -3
  228. endoreg_db/queries/annotations/legacy.py +37 -26
  229. endoreg_db/root_urls.py +3 -4
  230. endoreg_db/schemas/examination_evaluation.py +3 -0
  231. endoreg_db/serializers/Frames_NICE_and_PARIS_classifications.py +249 -163
  232. endoreg_db/serializers/__init__.py +2 -8
  233. endoreg_db/serializers/administration/__init__.py +1 -2
  234. endoreg_db/serializers/administration/ai/__init__.py +0 -1
  235. endoreg_db/serializers/administration/ai/active_model.py +3 -1
  236. endoreg_db/serializers/administration/ai/ai_model.py +5 -3
  237. endoreg_db/serializers/administration/ai/model_type.py +3 -1
  238. endoreg_db/serializers/administration/center.py +7 -2
  239. endoreg_db/serializers/administration/gender.py +4 -2
  240. endoreg_db/serializers/anonymization.py +13 -13
  241. endoreg_db/serializers/evaluation/examination_evaluation.py +0 -1
  242. endoreg_db/serializers/examination/__init__.py +1 -1
  243. endoreg_db/serializers/examination/base.py +12 -13
  244. endoreg_db/serializers/examination/dropdown.py +6 -7
  245. endoreg_db/serializers/examination_serializer.py +3 -6
  246. endoreg_db/serializers/finding/__init__.py +1 -1
  247. endoreg_db/serializers/finding/finding.py +14 -7
  248. endoreg_db/serializers/finding_classification/__init__.py +3 -3
  249. endoreg_db/serializers/finding_classification/choice.py +3 -3
  250. endoreg_db/serializers/finding_classification/classification.py +2 -4
  251. endoreg_db/serializers/label_video_segment/__init__.py +5 -3
  252. endoreg_db/serializers/{label → label_video_segment}/image_classification_annotation.py +5 -5
  253. endoreg_db/serializers/label_video_segment/label/__init__.py +6 -0
  254. endoreg_db/serializers/{label → label_video_segment/label}/label.py +1 -1
  255. endoreg_db/serializers/label_video_segment/label_video_segment.py +338 -228
  256. endoreg_db/serializers/meta/__init__.py +1 -2
  257. endoreg_db/serializers/meta/sensitive_meta_detail.py +28 -13
  258. endoreg_db/serializers/meta/sensitive_meta_update.py +51 -46
  259. endoreg_db/serializers/meta/sensitive_meta_verification.py +19 -16
  260. endoreg_db/serializers/misc/__init__.py +2 -2
  261. endoreg_db/serializers/misc/file_overview.py +11 -7
  262. endoreg_db/serializers/misc/stats.py +10 -8
  263. endoreg_db/serializers/misc/translatable_field_mix_in.py +6 -6
  264. endoreg_db/serializers/misc/upload_job.py +32 -29
  265. endoreg_db/serializers/patient/__init__.py +2 -1
  266. endoreg_db/serializers/patient/patient.py +32 -15
  267. endoreg_db/serializers/patient/patient_dropdown.py +11 -3
  268. endoreg_db/serializers/patient_examination/__init__.py +1 -1
  269. endoreg_db/serializers/patient_examination/patient_examination.py +67 -40
  270. endoreg_db/serializers/patient_finding/__init__.py +1 -1
  271. endoreg_db/serializers/patient_finding/patient_finding.py +2 -1
  272. endoreg_db/serializers/patient_finding/patient_finding_classification.py +17 -9
  273. endoreg_db/serializers/patient_finding/patient_finding_detail.py +26 -17
  274. endoreg_db/serializers/patient_finding/patient_finding_intervention.py +7 -5
  275. endoreg_db/serializers/patient_finding/patient_finding_list.py +10 -11
  276. endoreg_db/serializers/patient_finding/patient_finding_write.py +36 -27
  277. endoreg_db/serializers/pdf/__init__.py +1 -3
  278. endoreg_db/serializers/requirements/requirement_schema.py +1 -6
  279. endoreg_db/serializers/sensitive_meta_serializer.py +100 -81
  280. endoreg_db/serializers/video/__init__.py +2 -2
  281. endoreg_db/serializers/video/{segmentation.py → video_file.py} +66 -47
  282. endoreg_db/serializers/video/video_file_brief.py +6 -2
  283. endoreg_db/serializers/video/video_file_detail.py +36 -23
  284. endoreg_db/serializers/video/video_file_list.py +4 -2
  285. endoreg_db/serializers/video/video_processing_history.py +54 -50
  286. endoreg_db/services/__init__.py +1 -1
  287. endoreg_db/services/anonymization.py +2 -2
  288. endoreg_db/services/examination_evaluation.py +40 -17
  289. endoreg_db/services/model_meta_from_hf.py +76 -0
  290. endoreg_db/services/polling_coordinator.py +101 -70
  291. endoreg_db/services/pseudonym_service.py +27 -22
  292. endoreg_db/services/report_import.py +6 -3
  293. endoreg_db/services/segment_sync.py +75 -59
  294. endoreg_db/services/video_import.py +6 -7
  295. endoreg_db/urls/__init__.py +2 -2
  296. endoreg_db/urls/ai.py +7 -25
  297. endoreg_db/urls/anonymization.py +61 -15
  298. endoreg_db/urls/auth.py +4 -4
  299. endoreg_db/urls/classification.py +4 -9
  300. endoreg_db/urls/examination.py +27 -18
  301. endoreg_db/urls/media.py +27 -34
  302. endoreg_db/urls/patient.py +11 -7
  303. endoreg_db/urls/requirements.py +3 -1
  304. endoreg_db/urls/root_urls.py +2 -3
  305. endoreg_db/urls/stats.py +24 -16
  306. endoreg_db/urls/upload.py +3 -11
  307. endoreg_db/utils/__init__.py +14 -15
  308. endoreg_db/utils/ai/__init__.py +1 -1
  309. endoreg_db/utils/ai/data_loader_for_model_input.py +262 -0
  310. endoreg_db/utils/ai/data_loader_for_model_training.py +262 -0
  311. endoreg_db/utils/ai/get.py +2 -1
  312. endoreg_db/utils/ai/inference_dataset.py +14 -15
  313. endoreg_db/utils/ai/model_training/config.py +117 -0
  314. endoreg_db/utils/ai/model_training/dataset.py +74 -0
  315. endoreg_db/utils/ai/model_training/losses.py +68 -0
  316. endoreg_db/utils/ai/model_training/metrics.py +78 -0
  317. endoreg_db/utils/ai/model_training/model_backbones.py +155 -0
  318. endoreg_db/utils/ai/model_training/model_gastronet_resnet.py +118 -0
  319. endoreg_db/utils/ai/model_training/trainer_gastronet_multilabel.py +771 -0
  320. endoreg_db/utils/ai/multilabel_classification_net.py +21 -6
  321. endoreg_db/utils/ai/predict.py +4 -4
  322. endoreg_db/utils/ai/preprocess.py +19 -11
  323. endoreg_db/utils/calc_duration_seconds.py +4 -4
  324. endoreg_db/utils/case_generator/lab_sample_factory.py +3 -4
  325. endoreg_db/utils/check_video_files.py +74 -47
  326. endoreg_db/utils/cropping.py +10 -9
  327. endoreg_db/utils/dataloader.py +11 -3
  328. endoreg_db/utils/dates.py +3 -4
  329. endoreg_db/utils/defaults/set_default_center.py +7 -6
  330. endoreg_db/utils/env.py +6 -2
  331. endoreg_db/utils/extract_specific_frames.py +24 -9
  332. endoreg_db/utils/file_operations.py +30 -18
  333. endoreg_db/utils/fix_video_path_direct.py +57 -41
  334. endoreg_db/utils/frame_anonymization_utils.py +157 -157
  335. endoreg_db/utils/hashs.py +3 -18
  336. endoreg_db/utils/links/requirement_link.py +96 -52
  337. endoreg_db/utils/ocr.py +30 -25
  338. endoreg_db/utils/operation_log.py +61 -0
  339. endoreg_db/utils/parse_and_generate_yaml.py +12 -13
  340. endoreg_db/utils/paths.py +6 -6
  341. endoreg_db/utils/permissions.py +40 -24
  342. endoreg_db/utils/pipelines/process_video_dir.py +50 -26
  343. endoreg_db/utils/product/sum_emissions.py +5 -3
  344. endoreg_db/utils/product/sum_weights.py +4 -2
  345. endoreg_db/utils/pydantic_models/__init__.py +3 -4
  346. endoreg_db/utils/requirement_operator_logic/_old/lab_value_operators.py +207 -107
  347. endoreg_db/utils/requirement_operator_logic/_old/model_evaluators.py +252 -65
  348. endoreg_db/utils/requirement_operator_logic/new_operator_logic.py +27 -10
  349. endoreg_db/utils/setup_config.py +21 -5
  350. endoreg_db/utils/storage.py +3 -1
  351. endoreg_db/utils/translation.py +19 -15
  352. endoreg_db/utils/uuid.py +1 -0
  353. endoreg_db/utils/validate_endo_roi.py +12 -4
  354. endoreg_db/utils/validate_subcategory_dict.py +26 -24
  355. endoreg_db/utils/validate_video_detailed.py +207 -149
  356. endoreg_db/utils/video/__init__.py +7 -3
  357. endoreg_db/utils/video/extract_frames.py +30 -18
  358. endoreg_db/utils/video/names.py +11 -6
  359. endoreg_db/utils/video/streaming_processor.py +175 -101
  360. endoreg_db/utils/video/video_splitter.py +30 -19
  361. endoreg_db/views/Frames_NICE_and_PARIS_classifications_views.py +59 -50
  362. endoreg_db/views/__init__.py +0 -20
  363. endoreg_db/views/anonymization/__init__.py +6 -2
  364. endoreg_db/views/anonymization/media_management.py +2 -6
  365. endoreg_db/views/anonymization/overview.py +34 -1
  366. endoreg_db/views/anonymization/validate.py +79 -18
  367. endoreg_db/views/auth/__init__.py +1 -1
  368. endoreg_db/views/auth/keycloak.py +16 -14
  369. endoreg_db/views/examination/__init__.py +12 -15
  370. endoreg_db/views/examination/examination.py +5 -5
  371. endoreg_db/views/examination/examination_manifest_cache.py +5 -5
  372. endoreg_db/views/examination/get_finding_classification_choices.py +8 -5
  373. endoreg_db/views/examination/get_finding_classifications.py +9 -7
  374. endoreg_db/views/examination/get_findings.py +8 -10
  375. endoreg_db/views/examination/get_instruments.py +3 -2
  376. endoreg_db/views/examination/get_interventions.py +1 -1
  377. endoreg_db/views/finding/__init__.py +2 -2
  378. endoreg_db/views/finding/finding.py +58 -54
  379. endoreg_db/views/finding/get_classifications.py +1 -1
  380. endoreg_db/views/finding/get_interventions.py +1 -1
  381. endoreg_db/views/finding_classification/__init__.py +5 -5
  382. endoreg_db/views/finding_classification/finding_classification.py +5 -6
  383. endoreg_db/views/finding_classification/get_classification_choices.py +3 -4
  384. endoreg_db/views/media/__init__.py +13 -13
  385. endoreg_db/views/media/pdf_media.py +9 -9
  386. endoreg_db/views/media/sensitive_metadata.py +10 -7
  387. endoreg_db/views/media/video_media.py +4 -4
  388. endoreg_db/views/meta/__init__.py +1 -1
  389. endoreg_db/views/meta/sensitive_meta_list.py +20 -22
  390. endoreg_db/views/meta/sensitive_meta_verification.py +14 -11
  391. endoreg_db/views/misc/__init__.py +6 -34
  392. endoreg_db/views/misc/center.py +2 -1
  393. endoreg_db/views/misc/csrf.py +2 -1
  394. endoreg_db/views/misc/gender.py +2 -1
  395. endoreg_db/views/misc/stats.py +141 -106
  396. endoreg_db/views/patient/__init__.py +1 -3
  397. endoreg_db/views/patient/patient.py +141 -99
  398. endoreg_db/views/patient_examination/__init__.py +5 -5
  399. endoreg_db/views/patient_examination/patient_examination.py +43 -42
  400. endoreg_db/views/patient_examination/patient_examination_create.py +10 -15
  401. endoreg_db/views/patient_examination/patient_examination_detail.py +12 -15
  402. endoreg_db/views/patient_examination/patient_examination_list.py +21 -17
  403. endoreg_db/views/patient_examination/video.py +114 -80
  404. endoreg_db/views/patient_finding/__init__.py +1 -1
  405. endoreg_db/views/patient_finding/patient_finding.py +17 -10
  406. endoreg_db/views/patient_finding/patient_finding_optimized.py +127 -95
  407. endoreg_db/views/patient_finding_classification/__init__.py +1 -1
  408. endoreg_db/views/patient_finding_classification/pfc_create.py +35 -27
  409. endoreg_db/views/report/reimport.py +1 -1
  410. endoreg_db/views/report/report_stream.py +5 -8
  411. endoreg_db/views/requirement/__init__.py +2 -1
  412. endoreg_db/views/requirement/evaluate.py +7 -9
  413. endoreg_db/views/requirement/lookup.py +2 -3
  414. endoreg_db/views/requirement/lookup_store.py +0 -1
  415. endoreg_db/views/requirement/requirement_utils.py +2 -4
  416. endoreg_db/views/stats/__init__.py +4 -4
  417. endoreg_db/views/stats/stats_views.py +152 -115
  418. endoreg_db/views/video/__init__.py +18 -27
  419. endoreg_db/views/{ai → video/ai}/__init__.py +2 -2
  420. endoreg_db/views/{ai → video/ai}/label.py +20 -16
  421. endoreg_db/views/video/correction.py +5 -6
  422. endoreg_db/views/video/reimport.py +134 -99
  423. endoreg_db/views/video/segments_crud.py +134 -44
  424. endoreg_db/views/video/video_apply_mask.py +13 -12
  425. endoreg_db/views/video/video_correction.py +2 -1
  426. endoreg_db/views/video/video_download_processed.py +15 -15
  427. endoreg_db/views/video/video_meta_stats.py +7 -6
  428. endoreg_db/views/video/video_processing_history.py +3 -2
  429. endoreg_db/views/video/video_remove_frames.py +13 -12
  430. endoreg_db/views/video/video_stream.py +110 -82
  431. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/METADATA +9 -3
  432. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/RECORD +434 -431
  433. endoreg_db/management/commands/import_fallback_video.py +0 -203
  434. endoreg_db/management/commands/import_video.py +0 -422
  435. endoreg_db/management/commands/import_video_with_classification.py +0 -367
  436. endoreg_db/models/media/processing_history/processing_history.py +0 -96
  437. endoreg_db/serializers/label/__init__.py +0 -7
  438. endoreg_db/serializers/label_video_segment/_lvs_create.py +0 -149
  439. endoreg_db/serializers/label_video_segment/_lvs_update.py +0 -138
  440. endoreg_db/serializers/label_video_segment/_lvs_validate.py +0 -149
  441. endoreg_db/serializers/label_video_segment/label_video_segment_annotation.py +0 -99
  442. endoreg_db/serializers/label_video_segment/label_video_segment_update.py +0 -163
  443. endoreg_db/services/__old/pdf_import.py +0 -1487
  444. endoreg_db/services/__old/video_import.py +0 -1306
  445. endoreg_db/tasks/upload_tasks.py +0 -216
  446. endoreg_db/tasks/video_ingest.py +0 -161
  447. endoreg_db/tasks/video_processing_tasks.py +0 -327
  448. endoreg_db/views/misc/translation.py +0 -182
  449. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/WHEEL +0 -0
  450. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,11 @@
1
1
  import huggingface_hub
2
2
  from typing import Optional
3
3
 
4
+
4
5
  def download_segmentation_model(
5
6
  repo_id: str = "wg-lux/colo_segmentation_RegNetX800MF_base",
6
7
  filename: str = "model.safetensors",
7
- cache_dir: Optional[str] = None
8
+ cache_dir: Optional[str] = None,
8
9
  ) -> str:
9
10
  """
10
11
  Downloads a segmentation model from Hugging Face and caches it locally.
@@ -16,7 +17,7 @@ def download_segmentation_model(
16
17
 
17
18
  Returns:
18
19
  str: The local path to the downloaded model.
19
-
20
+
20
21
  Example:
21
22
  >>> model_path = download_segmentation_model()
22
23
  >>> # Downloads from wg-lux/colo_segmentation_RegNetX800MF_base
@@ -28,4 +29,4 @@ def download_segmentation_model(
28
29
  force_download=False,
29
30
  resume_download=True,
30
31
  )
31
- return local_path
32
+ return local_path
@@ -1,6 +1 @@
1
- from endoreg_db.models import EndoscopyProcessor
2
- from django.core.management.base import BaseCommand
3
- from typing import TYPE_CHECKING
4
- from django.db.models import QuerySet
5
-
6
1
 
@@ -4,17 +4,16 @@ import re
4
4
  from typing import Optional
5
5
 
6
6
  from logging import getLogger
7
+
7
8
  logger = getLogger(__name__)
8
9
 
9
10
 
10
- from endoreg_db.utils.video.names import (
11
- get_video_key_regex_by_examination_alias
12
- )
11
+ from endoreg_db.utils.video.names import get_video_key_regex_by_examination_alias
13
12
 
14
13
  from django.conf import settings
15
14
 
16
15
 
17
- ASSET_DIR:Path = settings.ASSET_DIR
16
+ ASSET_DIR: Path = settings.ASSET_DIR
18
17
  assert ASSET_DIR.exists(), f"ASSET_DIR does not exist: {ASSET_DIR}"
19
18
 
20
19
  _TEST_VIDEOS = {
@@ -29,10 +28,10 @@ _TEST_VIDEOS = {
29
28
  TEST_VIDEOS = {key: value for key, value in _TEST_VIDEOS.items() if value.exists()}
30
29
 
31
30
 
32
- def get_video_path(video_key:str) -> Path:
31
+ def get_video_path(video_key: str) -> Path:
33
32
  """
34
33
  Returns the file path associated with the given video key.
35
-
34
+
36
35
  Raises:
37
36
  ValueError: If the video key does not exist in the available test videos.
38
37
  """
@@ -41,19 +40,22 @@ def get_video_path(video_key:str) -> Path:
41
40
  else:
42
41
  raise ValueError(f"Video key '{video_key}' not found in TEST_VIDEOS.")
43
42
 
43
+
44
44
  def get_video_keys(
45
- examination_alias:Optional[str]=None, content:Optional[str]=None, is_anonymous:Optional[bool]=None
45
+ examination_alias: Optional[str] = None,
46
+ content: Optional[str] = None,
47
+ is_anonymous: Optional[bool] = None,
46
48
  ):
47
49
  """
48
50
  Returns a list of video keys matching the specified examination alias, content type, and anonymity status.
49
-
51
+
50
52
  If no direct matches are found, falls back to suffix-based filtering for anonymity. Logs warnings and errors when fallback logic is used or no matches are found.
51
-
53
+
52
54
  Args:
53
55
  examination_alias: The examination alias to filter by, or None for any.
54
56
  content: The content type to filter by, or None for any.
55
57
  is_anonymous: Whether to filter for anonymous, non-anonymous, or both.
56
-
58
+
57
59
  Returns:
58
60
  A list of matching video keys.
59
61
  """
@@ -62,49 +64,57 @@ def get_video_keys(
62
64
  pattern_parts = ["^"]
63
65
  if examination_alias:
64
66
  pattern_parts.append(re.escape(examination_alias))
65
- pattern_parts.append("-.*-") # Match any content
67
+ pattern_parts.append("-.*-") # Match any content
66
68
  else:
67
- pattern_parts.append(".*-") # Match any examination alias and content
69
+ pattern_parts.append(".*-") # Match any examination alias and content
68
70
 
69
71
  if is_anonymous is True:
70
72
  pattern_parts.append("anonymous$")
71
73
  elif is_anonymous is False:
72
74
  pattern_parts.append("non_anonymous$")
73
- else: # is_anonymous is None
74
- pattern_parts.append("(non_)?anonymous$") # Match either
75
+ else: # is_anonymous is None
76
+ pattern_parts.append("(non_)?anonymous$") # Match either
75
77
 
76
78
  pattern = "".join(pattern_parts)
77
79
  else:
78
80
  # Use the imported function if content is specified
79
- pattern = get_video_key_regex_by_examination_alias(examination_alias, content, is_anonymous)
81
+ pattern = get_video_key_regex_by_examination_alias(
82
+ examination_alias, content, is_anonymous
83
+ )
80
84
  logger.warning(f"Generated pattern (from imported function): {pattern}")
81
85
 
82
-
83
86
  # Only consider keys for which the file actually exists
84
87
  keys_to_check = list(TEST_VIDEOS.keys())
85
88
  matched_keys = [key for key in keys_to_check if re.match(pattern, key)]
86
89
 
87
90
  # Fallback logic remains as a safety net, but ideally shouldn't be needed now for this case
88
91
  if not matched_keys and is_anonymous is False:
89
- logger.warning(f"Pattern '{pattern}' yielded no results for is_anonymous=False. Falling back to suffix check '-non_anonymous'.")
92
+ logger.warning(
93
+ f"Pattern '{pattern}' yielded no results for is_anonymous=False. Falling back to suffix check '-non_anonymous'."
94
+ )
90
95
  matched_keys = [key for key in keys_to_check if key.endswith("-non_anonymous")]
91
96
  elif not matched_keys and is_anonymous is True:
92
- logger.warning(f"Pattern '{pattern}' yielded no results for is_anonymous=True. Falling back to suffix check '-anonymous'.")
93
- matched_keys = [key for key in keys_to_check if key.endswith("-anonymous")]
97
+ logger.warning(
98
+ f"Pattern '{pattern}' yielded no results for is_anonymous=True. Falling back to suffix check '-anonymous'."
99
+ )
100
+ matched_keys = [key for key in keys_to_check if key.endswith("-anonymous")]
94
101
 
95
102
  if not matched_keys:
96
- logger.error(f"No keys found matching pattern '{pattern}' or fallback logic for keys: {keys_to_check}")
97
-
103
+ logger.error(
104
+ f"No keys found matching pattern '{pattern}' or fallback logic for keys: {keys_to_check}"
105
+ )
98
106
 
99
107
  return matched_keys
100
108
 
101
109
 
102
110
  def get_random_video_path_by_examination_alias(
103
- examination_alias:Optional[str]=None, content:Optional[str]=None, is_anonymous:Optional[bool]=None
111
+ examination_alias: Optional[str] = None,
112
+ content: Optional[str] = None,
113
+ is_anonymous: Optional[bool] = None,
104
114
  ):
105
115
  """
106
116
  Returns the file path of a randomly selected video matching the specified examination alias, content type, and anonymity status.
107
-
117
+
108
118
  Raises:
109
119
  ValueError: If no matching video keys are found for the given criteria.
110
120
  """
@@ -115,5 +125,3 @@ def get_random_video_path_by_examination_alias(
115
125
  return video_path # Return the first match for simplicity
116
126
  else:
117
127
  raise ValueError("No matching video keys found for the given criteria.")
118
-
119
-
@@ -23,5 +23,5 @@ __all__ = [
23
23
  "VideoImportService",
24
24
  "ImportContext",
25
25
  "validate_directories",
26
- "default_sensitive_meta"
26
+ "default_sensitive_meta",
27
27
  ]
@@ -4,4 +4,4 @@ from .import_context import ImportContext
4
4
  __all__ = [
5
5
  "file_lock",
6
6
  "ImportContext",
7
- ]
7
+ ]
@@ -3,7 +3,7 @@
3
3
  import os
4
4
  import logging
5
5
  from datetime import date
6
- from typing import Optional, Union
6
+ from typing import Union
7
7
 
8
8
  from endoreg_db.models.media import RawPdfFile, VideoFile
9
9
  from endoreg_db.models.metadata.sensitive_meta import SensitiveMeta # adjust path
@@ -17,18 +17,17 @@ DEFAULT_CENTER_NAME = "endoreg_db_demo"
17
17
  DEFAULT_PATIENT_DOB = date(1970, 1, 1)
18
18
 
19
19
 
20
- def default_sensitive_meta(instance: Union[RawPdfFile, VideoFile]) -> SensitiveMeta | None:
20
+ def default_sensitive_meta(
21
+ instance: Union[RawPdfFile, VideoFile],
22
+ ) -> SensitiveMeta | None:
21
23
  """
22
24
  Ensure the given instance has a minimal SensitiveMeta attached.
23
25
 
24
26
  Called after text extraction + merging; only creates meta if none exists.
25
27
  """
26
28
  if instance is None:
27
- logger.warning(
28
- "No instance available for ensuring default patient data"
29
- )
29
+ logger.warning("No instance available for ensuring default patient data")
30
30
  return
31
-
32
31
 
33
32
  if instance.sensitive_meta:
34
33
  # Already has meta; nothing to do
@@ -44,7 +43,9 @@ def default_sensitive_meta(instance: Union[RawPdfFile, VideoFile]) -> SensitiveM
44
43
  assert center_name is not None
45
44
  instance.center.name = center_name
46
45
  except AssertionError as e:
47
- logger.debug(f"{e}Center name is not set! You can set it in .env under DEFAULT_CENTER_NAME using default from default_sensitive_meta")
46
+ logger.debug(
47
+ f"{e}Center name is not set! You can set it in .env under DEFAULT_CENTER_NAME using default from default_sensitive_meta"
48
+ )
48
49
  instance.center.name = DEFAULT_CENTER_NAME
49
50
  instance.center.get_by_name(DEFAULT_CENTER_NAME)
50
51
 
@@ -59,7 +60,9 @@ def default_sensitive_meta(instance: Union[RawPdfFile, VideoFile]) -> SensitiveM
59
60
  else DEFAULT_CENTER_NAME
60
61
  ),
61
62
  # optional: link file_path for debugging/tracing
62
- "file_path": str(instance.file_path) if getattr(instance, "file_path", None) else None,
63
+ "file_path": str(instance.file_path)
64
+ if getattr(instance, "file_path", None)
65
+ else None,
63
66
  }
64
67
 
65
68
  try:
@@ -78,4 +81,3 @@ def default_sensitive_meta(instance: Union[RawPdfFile, VideoFile]) -> SensitiveM
78
81
  e,
79
82
  )
80
83
  return None
81
-
@@ -1,17 +1,17 @@
1
1
  from typing import Union, Optional
2
- import os
3
2
 
4
3
  from endoreg_db.models import Center
5
4
  from endoreg_db.models.media import RawPdfFile, VideoFile
6
5
 
7
6
 
8
- def ensure_center(instance: Union[RawPdfFile, VideoFile], center: Optional[str]) -> Center:
7
+ def ensure_center(
8
+ instance: Union[RawPdfFile, VideoFile], center: Optional[str]
9
+ ) -> Center:
9
10
  if not isinstance(instance.center, Center):
10
11
  raise AssertionError
11
12
  if not isinstance(instance.center.name, str):
12
13
  raise AssertionError
13
14
  assert isinstance(instance.center.get_by_name(center), Center)
14
- if not instance.center.get_by_name(center).name ==instance.center.name:
15
+ if not instance.center.get_by_name(center).name == instance.center.name:
15
16
  raise AssertionError
16
17
  return instance.center
17
-
@@ -3,8 +3,6 @@ from pathlib import Path
3
3
  import os
4
4
  import time
5
5
  from logging import getLogger
6
- import errno
7
- import shutil
8
6
  from typing import Generator, Any
9
7
 
10
8
  logger = getLogger(__name__)
@@ -45,7 +43,9 @@ def file_lock(path: Path) -> Generator[None, Any, None]:
45
43
  )
46
44
  lock_path.unlink()
47
45
  except Exception as e:
48
- logger.warning("Failed to remove stale lock %s: %s", lock_path, e)
46
+ logger.warning(
47
+ "Failed to remove stale lock %s: %s", lock_path, e
48
+ )
49
49
  continue
50
50
 
51
51
  if time.time() >= deadline:
@@ -4,40 +4,39 @@ from typing import Optional, Dict, Any, Union
4
4
 
5
5
  from lx_anonymizer.sensitive_meta_interface import SensitiveMeta
6
6
  from endoreg_db.models.media import RawPdfFile, VideoFile
7
+ from endoreg_db.utils.file_operations import sha256_file
7
8
 
8
9
 
9
10
  @dataclass
10
11
  class ImportContext:
11
- """
12
- Tracking the import success and reasons of failure.
13
- """
14
-
15
- # core import parameters
16
12
  file_path: Path
17
13
  center_name: str
18
14
  processor_name: str = "olympus-cv-500"
19
15
  delete_source: bool = True
16
+
20
17
  retry: bool = False
21
18
  import_completed: bool = False
22
19
  error_reason: str = ""
23
20
 
24
- # paths
25
21
  original_path: Optional[Path] = None
26
22
  quarantine_path: Optional[Path] = None
27
23
  sensitive_path: Optional[Path] = None
28
24
  anonymized_path: Optional[Path] = None
29
25
 
30
- # associated objects
31
26
  current_report: Optional[RawPdfFile] = None
32
27
  current_video: Optional[VideoFile] = None
33
28
  current_meta: Optional[SensitiveMeta] = None
29
+
34
30
  instance: Optional[Union[RawPdfFile, VideoFile]] = None
35
-
36
31
  file_type: str = "undefined"
37
-
38
-
39
- # processing metadata
40
- file_hash: str = ""
32
+
33
+ # will be populated in __post_init__
34
+ file_hash: Optional[str] = field(init=False)
35
+
41
36
  original_text: Optional[str] = None
42
37
  anonymized_text: Optional[str] = None
43
38
  extracted_metadata: Dict[str, Any] = field(default_factory=dict)
39
+
40
+ def __post_init__(self):
41
+ """Compute raw file hash after dataclass is constructed."""
42
+ self.file_hash = sha256_file(self.file_path)
@@ -53,4 +53,5 @@ def validate_directories(dirs: Iterable[Path] = dirs) -> bool:
53
53
 
54
54
  return ok
55
55
 
56
+
56
57
  validate_directories(dirs)
@@ -2,67 +2,91 @@
2
2
  import logging
3
3
  from typing import Tuple
4
4
 
5
- from endoreg_db.import_files.context.import_context import ImportContext
6
- from endoreg_db.models.media import RawPdfFile
7
- from endoreg_db.models.media.processing_history.processing_history import ProcessingHistory
8
5
  from endoreg_db.import_files.context.ensure_center import ensure_center
6
+ from endoreg_db.import_files.context.import_context import ImportContext #
7
+ from endoreg_db.utils.file_operations import sha256_file
8
+ from endoreg_db.models.media import RawPdfFile
9
+ from endoreg_db.models.state.processing_history.processing_history import (
10
+ ProcessingHistory,
11
+ )
12
+ from endoreg_db.import_files.file_storage.state_management import finalize_failure
13
+
9
14
  logger = logging.getLogger(__name__)
10
15
 
11
16
 
12
17
  def create_or_retrieve_report_file(
13
18
  ctx: ImportContext,
14
- ) -> Tuple[RawPdfFile, bool]:
19
+ ) -> Tuple[RawPdfFile, bool, bool]:
15
20
  """
16
21
  Create a new or retrieve an existing RawPdfFile for the given context.
17
22
 
18
23
  Returns:
19
24
  pdf : RawPdfFile instance
20
- needs_processing: True if the pipeline should run for this file
21
- (no successful history yet for this object/file_type key)
25
+ processed : True if there is already a successful ProcessingHistory for this file
26
+ needs_processing: True if the pipeline should run for this file in this call
22
27
  """
23
28
  file_path = ctx.file_path
24
29
  center_name = ctx.center_name
25
30
  delete_source = ctx.delete_source
26
31
  file_type = ctx.file_type # logical key for history; can be None
27
32
 
28
- # 1) Determine the RawPdfFile instance to work with
33
+ # default assumptions
34
+ processed = False
35
+ needs_processing = True
36
+
37
+ if not isinstance(ctx.file_hash, str):
38
+ ctx.file_hash = sha256_file(ctx.file_path)
39
+
40
+ # Check if we already have a successful history entry for this object
41
+ has_success_history = ProcessingHistory.has_history_for_hash(
42
+ file_hash=ctx.file_hash,
43
+ success=True,
44
+ )
45
+ has_failure_history = ProcessingHistory.has_history_for_hash(
46
+ file_hash=ctx.file_hash,
47
+ success=False,
48
+ )
49
+ if has_success_history:
50
+ logger.info(
51
+ "RawPdfFile pk= already has successful processing history (file_hash=%s) - short-circuiting",
52
+ ctx.file_hash,
53
+ )
54
+ processed = True
55
+ needs_processing = False
56
+ if not isinstance(ctx.current_report, RawPdfFile):
57
+ ctx.current_report = RawPdfFile.get_report_by_hash(ctx.file_hash)
58
+ return ctx.current_report, processed, needs_processing
59
+ elif has_failure_history:
60
+ if not isinstance(ctx.current_report, RawPdfFile):
61
+ ctx.current_report = RawPdfFile.get_report_by_hash(ctx.file_hash)
62
+ finalize_failure(ctx)
63
+ processed = True
64
+ needs_processing = True
65
+
66
+ # Determine the RawPdfFile instance to work with
29
67
  if ctx.current_report is not None:
30
68
  pdf = ctx.current_report
31
69
  logger.info("Using existing RawPdfFile from context: pk=%s", pdf.pk)
32
70
  else:
33
- logger.info("Creating new RawPdfFile from %s for center %s", file_path, center_name)
34
-
71
+ logger.info(
72
+ "Creating new RawPdfFile from %s for center %s",
73
+ file_path,
74
+ center_name,
75
+ )
76
+
35
77
  pdf = RawPdfFile.create_from_file_initialized(
36
78
  file_path=file_path,
37
79
  center_name=center_name,
38
80
  delete_source=delete_source,
39
81
  )
40
-
41
- center = ensure_center(pdf, ctx.center_name)
42
-
43
- logger.info(f"Successfully set up report file from {center.name}")
44
-
45
82
 
83
+ center = ensure_center(pdf, ctx.center_name)
84
+ logger.info("Successfully set up report file from %s", center.name)
46
85
 
47
- # 3) Check if we already have a successful history entry for this object+file_type
48
- has_success_history = ProcessingHistory.has_history_for_object(
49
- obj=pdf,
50
- success=True,
51
- )
52
-
53
- if has_success_history:
54
- logger.info(
55
- "RawPdfFile %s already has successful processing history (file_type=%s) - short-circuiting",
56
- getattr(pdf, str(pdf.file_path)),
57
- file_type,
58
- )
59
- # No need to run the pipeline again
60
- return pdf, False
61
-
62
- # 4) No successful history yet → ensure there is a history entry marking it as "in progress"/failed
63
- ProcessingHistory.get_or_create_for_object(
86
+ # No successful history yet ensure there is a history entry marking it as "in progress"/failed
87
+ ProcessingHistory.get_or_create_for_hash(
64
88
  obj=pdf,
65
- # At this point we haven't finished anonymization; treat as not-success yet.
89
+ file_hash=ctx.file_hash,
66
90
  success=False,
67
91
  )
68
92
 
@@ -72,5 +96,4 @@ def create_or_retrieve_report_file(
72
96
  file_type,
73
97
  )
74
98
 
75
- # Signal to the caller that the anonymization pipeline should run
76
- return pdf, True
99
+ return pdf, processed, needs_processing
@@ -1,75 +1,104 @@
1
1
  # endoreg_db/import_files/storage/create_video_file.py
2
2
  import logging
3
3
  from typing import Tuple
4
-
5
- from django.db import IntegrityError
6
-
7
4
  from endoreg_db.import_files.context.import_context import ImportContext
8
5
  from endoreg_db.import_files.context.ensure_center import ensure_center
6
+ from endoreg_db.utils.file_operations import sha256_file
9
7
  from endoreg_db.models.media import VideoFile
10
- from endoreg_db.models.media.processing_history.processing_history import ProcessingHistory
11
- from endoreg_db.utils.hashs import get_video_hash
8
+ from endoreg_db.models.state.processing_history.processing_history import (
9
+ ProcessingHistory,
10
+ )
11
+ from endoreg_db.import_files.file_storage.state_management import finalize_failure
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
15
 
16
16
  def create_or_retrieve_video_file(
17
17
  ctx: ImportContext,
18
- ) -> Tuple[VideoFile, bool]:
18
+ ) -> Tuple[VideoFile, bool, bool]:
19
19
  """
20
20
  Create a new or retrieve an existing VideoFile for the given context.
21
21
 
22
22
  Returns:
23
- video : VideoFile instance
24
- retry : whether we are re-processing an existing file
23
+ video : VideoFile instance
24
+ processed : True if there is already a successful ProcessingHistory for this file
25
+ needs_processing: True if the pipeline should run for this file in this call
25
26
  """
26
27
  file_path = ctx.file_path
28
+
27
29
  center_name = ctx.center_name
28
30
  processor_name = ctx.processor_name
29
31
  delete_source = ctx.delete_source
30
- file_type = ctx.file_type
32
+ file_type = ctx.file_type # logical key for history; can be None
33
+
34
+ # default assumptions (same semantics as report)
35
+ processed = False
36
+ needs_processing = True
37
+
38
+ if not isinstance(ctx.file_hash, str):
39
+ ctx.file_hash = sha256_file(ctx.file_path)
40
+
41
+ # 2) Check if we already have a successful history entry for this object
42
+ has_success_history = ProcessingHistory.has_history_for_hash(
43
+ file_hash=ctx.file_hash,
44
+ success=True,
45
+ )
46
+ has_failure_history = ProcessingHistory.has_history_for_hash(
47
+ file_hash=ctx.file_hash,
48
+ success=False,
49
+ )
31
50
 
32
- # 1) Determine the VideoFile instance to work with
51
+ if has_success_history:
52
+ logger.info(
53
+ "VideoFile already has successful processing history (file_hash=%s) "
54
+ "- short-circuiting",
55
+ ctx.file_hash,
56
+ )
57
+ processed = True
58
+ needs_processing = False
59
+ if not isinstance(ctx.current_video, VideoFile):
60
+ ctx.current_video = VideoFile.get_video_by_content_hash(ctx.file_hash)
61
+ return ctx.current_video, processed, needs_processing
62
+ elif has_failure_history:
63
+ if not isinstance(ctx.current_video, VideoFile):
64
+ ctx.current_video = VideoFile.get_video_by_content_hash(ctx.file_hash)
65
+ finalize_failure(ctx)
66
+
67
+ processed = True
68
+ needs_processing = True
69
+
70
+ # Determine the VideoFile instance to work with
33
71
  if ctx.current_video is not None:
34
72
  video = ctx.current_video
35
- logger.info("Using existing VideoFIle from context: pk =%s", video.pk)
73
+ logger.info("Using existing VideoFile from context: pk=%s", video.pk)
36
74
  else:
37
- logger.info("Creating new VideoFIle from %s for center %s", file_path, center_name)
75
+ logger.info(
76
+ "Creating new VideoFile from %s for center %s",
77
+ file_path,
78
+ center_name,
79
+ )
38
80
  video = VideoFile.create_from_file_initialized(
39
81
  file_path=file_path,
40
82
  center_name=center_name,
41
83
  processor_name=processor_name,
42
84
  delete_source=delete_source,
85
+ video_hash=ctx.file_hash,
43
86
  )
87
+ needs_processing = True
88
+
44
89
  center = ensure_center(video, ctx.center_name)
45
-
46
- logger.info(f"Successfully set up report file from {center.name}")
47
- # 3) Check if we already have a successful history entry for this object+file_type
48
- has_success_history = ProcessingHistory.has_history_for_object(
49
- obj=video,
50
- success=True,
51
- )
90
+ logger.info("Successfully set up video file from %s", center.name)
52
91
 
53
- if has_success_history:
54
- logger.info(
55
- "VideoFile %s already has successful processing history. (file_type:%s) - short-circuiting",
56
- getattr(video, str(video.active_file_path)),
57
- file_type,
92
+ # No successful history yet → ensure there is a history entry marking it as "in progress"/failed
93
+ ProcessingHistory.get_or_create_for_hash(
94
+ file_hash=ctx.file_hash,
95
+ success=False,
58
96
  )
59
-
60
- # 4) No successful history yet → ensure there is a history entry marking it as "in progress"/failed
61
- ProcessingHistory.get_or_create_for_object(
62
- obj=video,
63
- success=False
64
- )
65
-
66
-
97
+
67
98
  logger.info(
68
99
  "Video instance ready for processing: pk=%s, file_type=%s (needs_processing=True)",
69
100
  video.pk,
70
101
  file_type,
71
102
  )
72
-
73
- # Signal to the caller that the anonymization pipeline should run
74
- return video, True
75
103
 
104
+ return video, processed, needs_processing
@@ -1,18 +1,21 @@
1
1
  # endoreg_db/import_files/storage/sensitive_meta_storage.py
2
- from typing import Union, Dict, Any
2
+ from typing import Union
3
3
 
4
4
  from endoreg_db.models.media import RawPdfFile, VideoFile
5
5
  from endoreg_db.models.metadata import SensitiveMeta
6
6
  from endoreg_db.import_files.processing.sensitive_meta_adapter import (
7
7
  normalize_lx_sensitive_meta,
8
8
  )
9
- from endoreg_db.import_files.context.default_sensitive_meta import default_sensitive_meta
9
+ from endoreg_db.import_files.context.default_sensitive_meta import (
10
+ default_sensitive_meta,
11
+ )
10
12
  from logging import getLogger
11
13
  from lx_anonymizer.sensitive_meta_interface import SensitiveMeta as LxSM
12
14
  #
13
15
 
14
16
  logger = getLogger(__name__)
15
17
 
18
+
16
19
  def sensitive_meta_storage(
17
20
  sensitive_meta: LxSM,
18
21
  instance: Union[RawPdfFile, VideoFile],