endoreg-db 0.8.8.9__py3-none-any.whl → 0.8.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (453) hide show
  1. endoreg_db/admin.py +10 -5
  2. endoreg_db/apps.py +4 -7
  3. endoreg_db/authz/auth.py +1 -0
  4. endoreg_db/authz/backends.py +1 -1
  5. endoreg_db/authz/management/commands/list_routes.py +2 -0
  6. endoreg_db/authz/middleware.py +8 -7
  7. endoreg_db/authz/permissions.py +21 -10
  8. endoreg_db/authz/policy.py +14 -19
  9. endoreg_db/authz/views_auth.py +14 -10
  10. endoreg_db/codemods/rename_datetime_fields.py +8 -1
  11. endoreg_db/exceptions.py +5 -2
  12. endoreg_db/forms/__init__.py +0 -1
  13. endoreg_db/forms/examination_form.py +4 -3
  14. endoreg_db/forms/patient_finding_intervention_form.py +30 -8
  15. endoreg_db/forms/patient_form.py +9 -13
  16. endoreg_db/forms/questionnaires/__init__.py +1 -1
  17. endoreg_db/forms/settings/__init__.py +4 -1
  18. endoreg_db/forms/unit.py +2 -1
  19. endoreg_db/helpers/count_db.py +17 -14
  20. endoreg_db/helpers/default_objects.py +2 -1
  21. endoreg_db/helpers/download_segmentation_model.py +4 -3
  22. endoreg_db/helpers/interact.py +0 -5
  23. endoreg_db/helpers/test_video_helper.py +33 -25
  24. endoreg_db/import_files/__init__.py +1 -1
  25. endoreg_db/import_files/context/__init__.py +1 -1
  26. endoreg_db/import_files/context/default_sensitive_meta.py +11 -9
  27. endoreg_db/import_files/context/ensure_center.py +4 -4
  28. endoreg_db/import_files/context/file_lock.py +3 -3
  29. endoreg_db/import_files/context/import_context.py +11 -12
  30. endoreg_db/import_files/context/validate_directories.py +1 -0
  31. endoreg_db/import_files/file_storage/create_report_file.py +57 -34
  32. endoreg_db/import_files/file_storage/create_video_file.py +64 -35
  33. endoreg_db/import_files/file_storage/sensitive_meta_storage.py +5 -2
  34. endoreg_db/import_files/file_storage/state_management.py +146 -83
  35. endoreg_db/import_files/file_storage/storage.py +5 -1
  36. endoreg_db/import_files/processing/report_processing/report_anonymization.py +24 -19
  37. endoreg_db/import_files/processing/sensitive_meta_adapter.py +3 -3
  38. endoreg_db/import_files/processing/video_processing/video_anonymization.py +18 -18
  39. endoreg_db/import_files/pseudonymization/k_anonymity.py +8 -9
  40. endoreg_db/import_files/pseudonymization/k_pseudonymity.py +16 -5
  41. endoreg_db/import_files/report_import_service.py +36 -30
  42. endoreg_db/import_files/video_import_service.py +27 -23
  43. endoreg_db/logger_conf.py +56 -40
  44. endoreg_db/management/__init__.py +1 -1
  45. endoreg_db/management/commands/__init__.py +1 -1
  46. endoreg_db/management/commands/check_auth.py +45 -38
  47. endoreg_db/management/commands/create_model_meta_from_huggingface.py +53 -2
  48. endoreg_db/management/commands/create_multilabel_model_meta.py +54 -19
  49. endoreg_db/management/commands/fix_missing_patient_data.py +105 -71
  50. endoreg_db/management/commands/fix_video_paths.py +75 -54
  51. endoreg_db/management/commands/import_report.py +1 -3
  52. endoreg_db/management/commands/list_routes.py +2 -0
  53. endoreg_db/management/commands/load_ai_model_data.py +8 -2
  54. endoreg_db/management/commands/load_ai_model_label_data.py +0 -1
  55. endoreg_db/management/commands/load_center_data.py +3 -3
  56. endoreg_db/management/commands/load_distribution_data.py +35 -38
  57. endoreg_db/management/commands/load_endoscope_data.py +0 -3
  58. endoreg_db/management/commands/load_examination_data.py +20 -4
  59. endoreg_db/management/commands/load_finding_data.py +18 -3
  60. endoreg_db/management/commands/load_gender_data.py +17 -24
  61. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +95 -85
  62. endoreg_db/management/commands/load_information_source.py +0 -3
  63. endoreg_db/management/commands/load_lab_value_data.py +14 -3
  64. endoreg_db/management/commands/load_legacy_data.py +303 -0
  65. endoreg_db/management/commands/load_name_data.py +1 -2
  66. endoreg_db/management/commands/load_pdf_type_data.py +4 -8
  67. endoreg_db/management/commands/load_profession_data.py +0 -1
  68. endoreg_db/management/commands/load_report_reader_flag_data.py +0 -4
  69. endoreg_db/management/commands/load_requirement_data.py +6 -2
  70. endoreg_db/management/commands/load_unit_data.py +0 -4
  71. endoreg_db/management/commands/load_user_groups.py +5 -7
  72. endoreg_db/management/commands/model_input.py +169 -0
  73. endoreg_db/management/commands/register_ai_model.py +22 -16
  74. endoreg_db/management/commands/setup_endoreg_db.py +110 -32
  75. endoreg_db/management/commands/storage_management.py +14 -8
  76. endoreg_db/management/commands/summarize_db_content.py +154 -63
  77. endoreg_db/management/commands/train_image_multilabel_model.py +144 -0
  78. endoreg_db/management/commands/validate_video_files.py +82 -50
  79. endoreg_db/management/commands/video_validation.py +4 -6
  80. endoreg_db/migrations/0001_initial.py +112 -63
  81. endoreg_db/migrations/__init__.py +0 -0
  82. endoreg_db/models/__init__.py +8 -0
  83. endoreg_db/models/administration/ai/active_model.py +5 -5
  84. endoreg_db/models/administration/ai/ai_model.py +41 -18
  85. endoreg_db/models/administration/ai/model_type.py +1 -0
  86. endoreg_db/models/administration/case/case.py +22 -22
  87. endoreg_db/models/administration/center/__init__.py +5 -5
  88. endoreg_db/models/administration/center/center.py +6 -2
  89. endoreg_db/models/administration/center/center_resource.py +18 -4
  90. endoreg_db/models/administration/center/center_shift.py +3 -1
  91. endoreg_db/models/administration/center/center_waste.py +6 -2
  92. endoreg_db/models/administration/person/__init__.py +1 -1
  93. endoreg_db/models/administration/person/employee/__init__.py +1 -1
  94. endoreg_db/models/administration/person/employee/employee_type.py +3 -1
  95. endoreg_db/models/administration/person/examiner/__init__.py +1 -1
  96. endoreg_db/models/administration/person/examiner/examiner.py +10 -2
  97. endoreg_db/models/administration/person/names/first_name.py +6 -4
  98. endoreg_db/models/administration/person/names/last_name.py +4 -3
  99. endoreg_db/models/administration/person/patient/__init__.py +1 -1
  100. endoreg_db/models/administration/person/patient/patient.py +0 -1
  101. endoreg_db/models/administration/person/patient/patient_external_id.py +0 -1
  102. endoreg_db/models/administration/person/person.py +1 -1
  103. endoreg_db/models/administration/product/__init__.py +7 -6
  104. endoreg_db/models/administration/product/product.py +6 -2
  105. endoreg_db/models/administration/product/product_group.py +9 -7
  106. endoreg_db/models/administration/product/product_material.py +9 -2
  107. endoreg_db/models/administration/product/reference_product.py +64 -15
  108. endoreg_db/models/administration/qualification/qualification.py +3 -1
  109. endoreg_db/models/administration/shift/shift.py +3 -1
  110. endoreg_db/models/administration/shift/shift_type.py +12 -4
  111. endoreg_db/models/aidataset/__init__.py +5 -0
  112. endoreg_db/models/aidataset/aidataset.py +193 -0
  113. endoreg_db/models/label/__init__.py +1 -1
  114. endoreg_db/models/label/label.py +10 -2
  115. endoreg_db/models/label/label_set.py +3 -1
  116. endoreg_db/models/label/label_video_segment/_create_from_video.py +6 -2
  117. endoreg_db/models/label/label_video_segment/label_video_segment.py +148 -44
  118. endoreg_db/models/media/__init__.py +12 -5
  119. endoreg_db/models/media/frame/__init__.py +1 -1
  120. endoreg_db/models/media/frame/frame.py +34 -8
  121. endoreg_db/models/media/pdf/__init__.py +2 -1
  122. endoreg_db/models/media/pdf/raw_pdf.py +11 -4
  123. endoreg_db/models/media/pdf/report_file.py +6 -2
  124. endoreg_db/models/media/pdf/report_reader/__init__.py +3 -3
  125. endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +15 -5
  126. endoreg_db/models/media/video/create_from_file.py +20 -41
  127. endoreg_db/models/media/video/pipe_1.py +75 -30
  128. endoreg_db/models/media/video/pipe_2.py +37 -12
  129. endoreg_db/models/media/video/video_file.py +36 -24
  130. endoreg_db/models/media/video/video_file_ai.py +235 -70
  131. endoreg_db/models/media/video/video_file_anonymize.py +240 -65
  132. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -1
  133. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +3 -1
  134. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +30 -9
  135. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +95 -29
  136. endoreg_db/models/media/video/video_file_frames/_get_frame.py +13 -3
  137. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -1
  138. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +15 -3
  139. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +15 -3
  140. endoreg_db/models/media/video/video_file_frames/_get_frames.py +7 -2
  141. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +109 -23
  142. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +111 -27
  143. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +46 -13
  144. endoreg_db/models/media/video/video_file_io.py +85 -33
  145. endoreg_db/models/media/video/video_file_meta/__init__.py +6 -6
  146. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +17 -4
  147. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +28 -7
  148. endoreg_db/models/media/video/video_file_meta/get_fps.py +46 -13
  149. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +81 -20
  150. endoreg_db/models/media/video/video_file_meta/text_meta.py +61 -20
  151. endoreg_db/models/media/video/video_file_meta/video_meta.py +40 -12
  152. endoreg_db/models/media/video/video_file_segments.py +118 -27
  153. endoreg_db/models/media/video/video_metadata.py +25 -6
  154. endoreg_db/models/media/video/video_processing.py +54 -15
  155. endoreg_db/models/medical/__init__.py +3 -13
  156. endoreg_db/models/medical/contraindication/__init__.py +3 -1
  157. endoreg_db/models/medical/disease.py +18 -6
  158. endoreg_db/models/medical/event.py +6 -2
  159. endoreg_db/models/medical/examination/__init__.py +5 -1
  160. endoreg_db/models/medical/examination/examination.py +22 -6
  161. endoreg_db/models/medical/examination/examination_indication.py +23 -7
  162. endoreg_db/models/medical/examination/examination_time.py +6 -2
  163. endoreg_db/models/medical/finding/__init__.py +3 -1
  164. endoreg_db/models/medical/finding/finding.py +37 -12
  165. endoreg_db/models/medical/finding/finding_classification.py +27 -8
  166. endoreg_db/models/medical/finding/finding_intervention.py +19 -6
  167. endoreg_db/models/medical/finding/finding_type.py +3 -1
  168. endoreg_db/models/medical/hardware/__init__.py +1 -1
  169. endoreg_db/models/medical/hardware/endoscope.py +14 -2
  170. endoreg_db/models/medical/laboratory/__init__.py +1 -1
  171. endoreg_db/models/medical/laboratory/lab_value.py +139 -39
  172. endoreg_db/models/medical/medication/__init__.py +7 -3
  173. endoreg_db/models/medical/medication/medication.py +3 -1
  174. endoreg_db/models/medical/medication/medication_indication.py +3 -1
  175. endoreg_db/models/medical/medication/medication_indication_type.py +11 -3
  176. endoreg_db/models/medical/medication/medication_intake_time.py +3 -1
  177. endoreg_db/models/medical/medication/medication_schedule.py +3 -1
  178. endoreg_db/models/medical/patient/__init__.py +2 -10
  179. endoreg_db/models/medical/patient/medication_examples.py +3 -14
  180. endoreg_db/models/medical/patient/patient_disease.py +17 -5
  181. endoreg_db/models/medical/patient/patient_event.py +12 -4
  182. endoreg_db/models/medical/patient/patient_examination.py +52 -15
  183. endoreg_db/models/medical/patient/patient_examination_indication.py +15 -4
  184. endoreg_db/models/medical/patient/patient_finding.py +105 -29
  185. endoreg_db/models/medical/patient/patient_finding_classification.py +41 -12
  186. endoreg_db/models/medical/patient/patient_finding_intervention.py +11 -3
  187. endoreg_db/models/medical/patient/patient_lab_sample.py +6 -2
  188. endoreg_db/models/medical/patient/patient_lab_value.py +42 -10
  189. endoreg_db/models/medical/patient/patient_medication.py +25 -7
  190. endoreg_db/models/medical/patient/patient_medication_schedule.py +34 -10
  191. endoreg_db/models/metadata/model_meta.py +40 -12
  192. endoreg_db/models/metadata/model_meta_logic.py +51 -16
  193. endoreg_db/models/metadata/sensitive_meta.py +65 -28
  194. endoreg_db/models/metadata/sensitive_meta_logic.py +28 -26
  195. endoreg_db/models/metadata/video_meta.py +146 -39
  196. endoreg_db/models/metadata/video_prediction_logic.py +70 -21
  197. endoreg_db/models/metadata/video_prediction_meta.py +80 -27
  198. endoreg_db/models/operation_log.py +63 -0
  199. endoreg_db/models/other/__init__.py +10 -10
  200. endoreg_db/models/other/distribution/__init__.py +9 -7
  201. endoreg_db/models/other/distribution/base_value_distribution.py +3 -1
  202. endoreg_db/models/other/distribution/date_value_distribution.py +19 -5
  203. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +3 -1
  204. endoreg_db/models/other/distribution/numeric_value_distribution.py +34 -9
  205. endoreg_db/models/other/emission/__init__.py +1 -1
  206. endoreg_db/models/other/emission/emission_factor.py +9 -3
  207. endoreg_db/models/other/information_source.py +15 -5
  208. endoreg_db/models/other/material.py +3 -1
  209. endoreg_db/models/other/transport_route.py +3 -1
  210. endoreg_db/models/other/unit.py +6 -2
  211. endoreg_db/models/report/report.py +0 -1
  212. endoreg_db/models/requirement/requirement.py +84 -27
  213. endoreg_db/models/requirement/requirement_error.py +5 -6
  214. endoreg_db/models/requirement/requirement_evaluation/__init__.py +1 -1
  215. endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +8 -8
  216. endoreg_db/models/requirement/requirement_evaluation/get_values.py +3 -3
  217. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +24 -8
  218. endoreg_db/models/requirement/requirement_operator.py +28 -8
  219. endoreg_db/models/requirement/requirement_set.py +34 -11
  220. endoreg_db/models/state/__init__.py +1 -0
  221. endoreg_db/models/state/audit_ledger.py +9 -2
  222. endoreg_db/models/{media → state}/processing_history/__init__.py +1 -3
  223. endoreg_db/models/state/processing_history/processing_history.py +136 -0
  224. endoreg_db/models/state/raw_pdf.py +0 -1
  225. endoreg_db/models/state/video.py +2 -3
  226. endoreg_db/models/utils.py +4 -2
  227. endoreg_db/queries/__init__.py +2 -6
  228. endoreg_db/queries/annotations/__init__.py +1 -3
  229. endoreg_db/queries/annotations/legacy.py +37 -26
  230. endoreg_db/root_urls.py +3 -4
  231. endoreg_db/schemas/examination_evaluation.py +3 -0
  232. endoreg_db/serializers/Frames_NICE_and_PARIS_classifications.py +249 -163
  233. endoreg_db/serializers/__init__.py +2 -8
  234. endoreg_db/serializers/administration/__init__.py +1 -2
  235. endoreg_db/serializers/administration/ai/__init__.py +0 -1
  236. endoreg_db/serializers/administration/ai/active_model.py +3 -1
  237. endoreg_db/serializers/administration/ai/ai_model.py +5 -3
  238. endoreg_db/serializers/administration/ai/model_type.py +3 -1
  239. endoreg_db/serializers/administration/center.py +7 -2
  240. endoreg_db/serializers/administration/gender.py +4 -2
  241. endoreg_db/serializers/anonymization.py +13 -13
  242. endoreg_db/serializers/evaluation/examination_evaluation.py +0 -1
  243. endoreg_db/serializers/examination/__init__.py +1 -1
  244. endoreg_db/serializers/examination/base.py +12 -13
  245. endoreg_db/serializers/examination/dropdown.py +6 -7
  246. endoreg_db/serializers/examination_serializer.py +3 -6
  247. endoreg_db/serializers/finding/__init__.py +1 -1
  248. endoreg_db/serializers/finding/finding.py +14 -7
  249. endoreg_db/serializers/finding_classification/__init__.py +3 -3
  250. endoreg_db/serializers/finding_classification/choice.py +3 -3
  251. endoreg_db/serializers/finding_classification/classification.py +2 -4
  252. endoreg_db/serializers/label_video_segment/__init__.py +5 -3
  253. endoreg_db/serializers/{label → label_video_segment}/image_classification_annotation.py +5 -5
  254. endoreg_db/serializers/label_video_segment/label/__init__.py +6 -0
  255. endoreg_db/serializers/{label → label_video_segment/label}/label.py +1 -1
  256. endoreg_db/serializers/label_video_segment/label_video_segment.py +338 -228
  257. endoreg_db/serializers/meta/__init__.py +1 -2
  258. endoreg_db/serializers/meta/sensitive_meta_detail.py +28 -13
  259. endoreg_db/serializers/meta/sensitive_meta_update.py +51 -46
  260. endoreg_db/serializers/meta/sensitive_meta_verification.py +19 -16
  261. endoreg_db/serializers/misc/__init__.py +2 -2
  262. endoreg_db/serializers/misc/file_overview.py +11 -7
  263. endoreg_db/serializers/misc/stats.py +10 -8
  264. endoreg_db/serializers/misc/translatable_field_mix_in.py +6 -6
  265. endoreg_db/serializers/misc/upload_job.py +32 -29
  266. endoreg_db/serializers/patient/__init__.py +2 -1
  267. endoreg_db/serializers/patient/patient.py +32 -15
  268. endoreg_db/serializers/patient/patient_dropdown.py +11 -3
  269. endoreg_db/serializers/patient_examination/__init__.py +1 -1
  270. endoreg_db/serializers/patient_examination/patient_examination.py +67 -40
  271. endoreg_db/serializers/patient_finding/__init__.py +1 -1
  272. endoreg_db/serializers/patient_finding/patient_finding.py +2 -1
  273. endoreg_db/serializers/patient_finding/patient_finding_classification.py +17 -9
  274. endoreg_db/serializers/patient_finding/patient_finding_detail.py +26 -17
  275. endoreg_db/serializers/patient_finding/patient_finding_intervention.py +7 -5
  276. endoreg_db/serializers/patient_finding/patient_finding_list.py +10 -11
  277. endoreg_db/serializers/patient_finding/patient_finding_write.py +36 -27
  278. endoreg_db/serializers/pdf/__init__.py +1 -3
  279. endoreg_db/serializers/requirements/requirement_schema.py +1 -6
  280. endoreg_db/serializers/sensitive_meta_serializer.py +100 -81
  281. endoreg_db/serializers/video/__init__.py +2 -2
  282. endoreg_db/serializers/video/{segmentation.py → video_file.py} +66 -47
  283. endoreg_db/serializers/video/video_file_brief.py +6 -2
  284. endoreg_db/serializers/video/video_file_detail.py +36 -23
  285. endoreg_db/serializers/video/video_file_list.py +4 -2
  286. endoreg_db/serializers/video/video_processing_history.py +54 -50
  287. endoreg_db/services/__init__.py +1 -1
  288. endoreg_db/services/anonymization.py +2 -2
  289. endoreg_db/services/examination_evaluation.py +40 -17
  290. endoreg_db/services/model_meta_from_hf.py +76 -0
  291. endoreg_db/services/polling_coordinator.py +101 -70
  292. endoreg_db/services/pseudonym_service.py +27 -22
  293. endoreg_db/services/report_import.py +6 -3
  294. endoreg_db/services/segment_sync.py +75 -59
  295. endoreg_db/services/video_import.py +6 -7
  296. endoreg_db/urls/__init__.py +2 -2
  297. endoreg_db/urls/ai.py +7 -25
  298. endoreg_db/urls/anonymization.py +61 -15
  299. endoreg_db/urls/auth.py +4 -4
  300. endoreg_db/urls/classification.py +4 -9
  301. endoreg_db/urls/examination.py +27 -18
  302. endoreg_db/urls/media.py +27 -34
  303. endoreg_db/urls/patient.py +11 -7
  304. endoreg_db/urls/requirements.py +3 -1
  305. endoreg_db/urls/root_urls.py +2 -3
  306. endoreg_db/urls/stats.py +24 -16
  307. endoreg_db/urls/upload.py +3 -11
  308. endoreg_db/utils/__init__.py +14 -15
  309. endoreg_db/utils/ai/__init__.py +1 -1
  310. endoreg_db/utils/ai/data_loader_for_model_input.py +262 -0
  311. endoreg_db/utils/ai/data_loader_for_model_training.py +262 -0
  312. endoreg_db/utils/ai/get.py +2 -1
  313. endoreg_db/utils/ai/inference_dataset.py +14 -15
  314. endoreg_db/utils/ai/model_training/config.py +117 -0
  315. endoreg_db/utils/ai/model_training/dataset.py +74 -0
  316. endoreg_db/utils/ai/model_training/losses.py +68 -0
  317. endoreg_db/utils/ai/model_training/metrics.py +78 -0
  318. endoreg_db/utils/ai/model_training/model_backbones.py +155 -0
  319. endoreg_db/utils/ai/model_training/model_gastronet_resnet.py +118 -0
  320. endoreg_db/utils/ai/model_training/trainer_gastronet_multilabel.py +771 -0
  321. endoreg_db/utils/ai/multilabel_classification_net.py +21 -6
  322. endoreg_db/utils/ai/predict.py +4 -4
  323. endoreg_db/utils/ai/preprocess.py +19 -11
  324. endoreg_db/utils/calc_duration_seconds.py +4 -4
  325. endoreg_db/utils/case_generator/lab_sample_factory.py +3 -4
  326. endoreg_db/utils/check_video_files.py +74 -47
  327. endoreg_db/utils/cropping.py +10 -9
  328. endoreg_db/utils/dataloader.py +11 -3
  329. endoreg_db/utils/dates.py +3 -4
  330. endoreg_db/utils/defaults/set_default_center.py +7 -6
  331. endoreg_db/utils/env.py +6 -2
  332. endoreg_db/utils/extract_specific_frames.py +24 -9
  333. endoreg_db/utils/file_operations.py +30 -18
  334. endoreg_db/utils/fix_video_path_direct.py +57 -41
  335. endoreg_db/utils/frame_anonymization_utils.py +157 -157
  336. endoreg_db/utils/hashs.py +3 -18
  337. endoreg_db/utils/links/requirement_link.py +96 -52
  338. endoreg_db/utils/ocr.py +30 -25
  339. endoreg_db/utils/operation_log.py +61 -0
  340. endoreg_db/utils/parse_and_generate_yaml.py +12 -13
  341. endoreg_db/utils/paths.py +6 -6
  342. endoreg_db/utils/permissions.py +40 -24
  343. endoreg_db/utils/pipelines/process_video_dir.py +50 -26
  344. endoreg_db/utils/product/sum_emissions.py +5 -3
  345. endoreg_db/utils/product/sum_weights.py +4 -2
  346. endoreg_db/utils/pydantic_models/__init__.py +3 -4
  347. endoreg_db/utils/requirement_operator_logic/_old/lab_value_operators.py +207 -107
  348. endoreg_db/utils/requirement_operator_logic/_old/model_evaluators.py +252 -65
  349. endoreg_db/utils/requirement_operator_logic/new_operator_logic.py +27 -10
  350. endoreg_db/utils/setup_config.py +21 -5
  351. endoreg_db/utils/storage.py +3 -1
  352. endoreg_db/utils/translation.py +19 -15
  353. endoreg_db/utils/uuid.py +1 -0
  354. endoreg_db/utils/validate_endo_roi.py +12 -4
  355. endoreg_db/utils/validate_subcategory_dict.py +26 -24
  356. endoreg_db/utils/validate_video_detailed.py +207 -149
  357. endoreg_db/utils/video/__init__.py +7 -3
  358. endoreg_db/utils/video/extract_frames.py +30 -18
  359. endoreg_db/utils/video/ffmpeg_wrapper.py +217 -52
  360. endoreg_db/utils/video/names.py +11 -6
  361. endoreg_db/utils/video/streaming_processor.py +175 -101
  362. endoreg_db/utils/video/video_splitter.py +30 -19
  363. endoreg_db/views/Frames_NICE_and_PARIS_classifications_views.py +59 -50
  364. endoreg_db/views/__init__.py +0 -20
  365. endoreg_db/views/anonymization/__init__.py +6 -2
  366. endoreg_db/views/anonymization/media_management.py +2 -6
  367. endoreg_db/views/anonymization/overview.py +34 -1
  368. endoreg_db/views/anonymization/validate.py +79 -18
  369. endoreg_db/views/auth/__init__.py +1 -1
  370. endoreg_db/views/auth/keycloak.py +16 -14
  371. endoreg_db/views/examination/__init__.py +12 -15
  372. endoreg_db/views/examination/examination.py +5 -5
  373. endoreg_db/views/examination/examination_manifest_cache.py +5 -5
  374. endoreg_db/views/examination/get_finding_classification_choices.py +8 -5
  375. endoreg_db/views/examination/get_finding_classifications.py +9 -7
  376. endoreg_db/views/examination/get_findings.py +8 -10
  377. endoreg_db/views/examination/get_instruments.py +3 -2
  378. endoreg_db/views/examination/get_interventions.py +1 -1
  379. endoreg_db/views/finding/__init__.py +2 -2
  380. endoreg_db/views/finding/finding.py +58 -54
  381. endoreg_db/views/finding/get_classifications.py +1 -1
  382. endoreg_db/views/finding/get_interventions.py +1 -1
  383. endoreg_db/views/finding_classification/__init__.py +5 -5
  384. endoreg_db/views/finding_classification/finding_classification.py +5 -6
  385. endoreg_db/views/finding_classification/get_classification_choices.py +3 -4
  386. endoreg_db/views/media/__init__.py +13 -13
  387. endoreg_db/views/media/pdf_media.py +9 -9
  388. endoreg_db/views/media/sensitive_metadata.py +10 -7
  389. endoreg_db/views/media/video_media.py +4 -4
  390. endoreg_db/views/meta/__init__.py +1 -1
  391. endoreg_db/views/meta/sensitive_meta_list.py +20 -22
  392. endoreg_db/views/meta/sensitive_meta_verification.py +14 -11
  393. endoreg_db/views/misc/__init__.py +6 -34
  394. endoreg_db/views/misc/center.py +2 -1
  395. endoreg_db/views/misc/csrf.py +2 -1
  396. endoreg_db/views/misc/gender.py +2 -1
  397. endoreg_db/views/misc/stats.py +141 -106
  398. endoreg_db/views/patient/__init__.py +1 -3
  399. endoreg_db/views/patient/patient.py +141 -99
  400. endoreg_db/views/patient_examination/__init__.py +5 -5
  401. endoreg_db/views/patient_examination/patient_examination.py +43 -42
  402. endoreg_db/views/patient_examination/patient_examination_create.py +10 -15
  403. endoreg_db/views/patient_examination/patient_examination_detail.py +12 -15
  404. endoreg_db/views/patient_examination/patient_examination_list.py +21 -17
  405. endoreg_db/views/patient_examination/video.py +114 -80
  406. endoreg_db/views/patient_finding/__init__.py +1 -1
  407. endoreg_db/views/patient_finding/patient_finding.py +17 -10
  408. endoreg_db/views/patient_finding/patient_finding_optimized.py +127 -95
  409. endoreg_db/views/patient_finding_classification/__init__.py +1 -1
  410. endoreg_db/views/patient_finding_classification/pfc_create.py +35 -27
  411. endoreg_db/views/report/reimport.py +1 -1
  412. endoreg_db/views/report/report_stream.py +5 -8
  413. endoreg_db/views/requirement/__init__.py +2 -1
  414. endoreg_db/views/requirement/evaluate.py +7 -9
  415. endoreg_db/views/requirement/lookup.py +2 -3
  416. endoreg_db/views/requirement/lookup_store.py +0 -1
  417. endoreg_db/views/requirement/requirement_utils.py +2 -4
  418. endoreg_db/views/stats/__init__.py +4 -4
  419. endoreg_db/views/stats/stats_views.py +152 -115
  420. endoreg_db/views/video/__init__.py +18 -27
  421. endoreg_db/views/{ai → video/ai}/__init__.py +2 -2
  422. endoreg_db/views/{ai → video/ai}/label.py +20 -16
  423. endoreg_db/views/video/correction.py +5 -6
  424. endoreg_db/views/video/reimport.py +134 -99
  425. endoreg_db/views/video/segments_crud.py +134 -44
  426. endoreg_db/views/video/video_apply_mask.py +13 -12
  427. endoreg_db/views/video/video_correction.py +2 -1
  428. endoreg_db/views/video/video_download_processed.py +15 -15
  429. endoreg_db/views/video/video_meta_stats.py +7 -6
  430. endoreg_db/views/video/video_processing_history.py +3 -2
  431. endoreg_db/views/video/video_remove_frames.py +13 -12
  432. endoreg_db/views/video/video_stream.py +110 -82
  433. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/METADATA +9 -3
  434. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/RECORD +436 -433
  435. endoreg_db/import_files/processing/video_processing/video_cleanup_on_error.py +0 -119
  436. endoreg_db/management/commands/import_fallback_video.py +0 -203
  437. endoreg_db/management/commands/import_video.py +0 -422
  438. endoreg_db/management/commands/import_video_with_classification.py +0 -367
  439. endoreg_db/models/media/processing_history/processing_history.py +0 -96
  440. endoreg_db/serializers/label/__init__.py +0 -7
  441. endoreg_db/serializers/label_video_segment/_lvs_create.py +0 -149
  442. endoreg_db/serializers/label_video_segment/_lvs_update.py +0 -138
  443. endoreg_db/serializers/label_video_segment/_lvs_validate.py +0 -149
  444. endoreg_db/serializers/label_video_segment/label_video_segment_annotation.py +0 -99
  445. endoreg_db/serializers/label_video_segment/label_video_segment_update.py +0 -163
  446. endoreg_db/services/__old/pdf_import.py +0 -1487
  447. endoreg_db/services/__old/video_import.py +0 -1306
  448. endoreg_db/tasks/upload_tasks.py +0 -216
  449. endoreg_db/tasks/video_ingest.py +0 -161
  450. endoreg_db/tasks/video_processing_tasks.py +0 -327
  451. endoreg_db/views/misc/translation.py +0 -182
  452. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/WHEEL +0 -0
  453. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,1306 +0,0 @@
1
- """
2
- Video import service module.
3
-
4
- Provides high-level functions for importing and anonymizing video files,
5
- combining VideoFile creation with frame-level anonymization.
6
-
7
- Changelog:
8
- October 14, 2025: Added file locking mechanism to prevent race conditions
9
- during concurrent video imports (matches report import pattern)
10
- """
11
-
12
- import logging
13
- import os
14
- import shutil
15
- import subprocess
16
- import time
17
- from contextlib import contextmanager
18
- from datetime import date
19
- from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Tuple, Union
21
-
22
- from django.db import transaction
23
- from django.db.models.fields.files import FieldFile
24
-
25
- from endoreg_db.models import EndoscopyProcessor, SensitiveMeta, VideoFile
26
- from endoreg_db.models.media.video.video_file_anonymize import _cleanup_raw_assets
27
- from endoreg_db.models.state import VideoState
28
- from endoreg_db.utils import ensure_local_file, storage_file_exists
29
- from endoreg_db.utils.hashs import get_video_hash
30
- from endoreg_db.utils.paths import ANONYM_VIDEO_DIR, STORAGE_DIR
31
-
32
- # File lock configuration (matches report import)
33
- STALE_LOCK_SECONDS = 6000 # 100 minutes - reclaim locks older than this
34
- MAX_LOCK_WAIT_SECONDS = (
35
- 90 # New: wait up to 90s for a non-stale lock to clear before skipping
36
- )
37
-
38
- logger = logging.getLogger(__name__)
39
-
40
-
41
- class VideoImportService:
42
- """
43
- Service for importing and anonymizing video files.
44
- Uses a central video instance pattern for cleaner state management.
45
-
46
- Features (October 14, 2025):
47
- - File locking to prevent concurrent processing of the same video
48
- - Stale lock detection and reclamation (600s timeout)
49
- - Hash-based duplicate detection
50
- - Graceful fallback processing without lx_anonymizer
51
- """
52
-
53
- def __init__(self, project_root: Optional[Path] = None):
54
- # Set up project root path
55
- if project_root:
56
- self.project_root = Path(project_root)
57
- else:
58
- self.project_root = Path(__file__).parent.parent.parent.parent
59
-
60
- # Track processed files to prevent duplicates
61
- try:
62
- # Ensure anonym_video directory exists before listing files
63
- anonym_video_dir = Path(ANONYM_VIDEO_DIR)
64
- if anonym_video_dir.exists():
65
- self.processed_files = set(
66
- str(anonym_video_dir / file)
67
- for file in os.listdir(ANONYM_VIDEO_DIR)
68
- )
69
- else:
70
- logger.info(f"Creating anonym_videos directory: {anonym_video_dir}")
71
- anonym_video_dir.mkdir(parents=True, exist_ok=True)
72
- self.processed_files = set()
73
- except Exception as e:
74
- logger.warning(f"Failed to initialize processed files tracking: {e}")
75
- self.processed_files = set()
76
-
77
- # Central video instance and processing context
78
- self.current_video: Optional[VideoFile] = None
79
- self.processing_context: Dict[str, Any] = {}
80
-
81
- self.delete_source = True
82
- self.original_file_path = None
83
-
84
- self.logger = logging.getLogger(__name__)
85
-
86
- self.current_video_id = Optional[int]
87
-
88
- self.cleaner = (
89
- None # This gets instantiated in the perform_frame_cleaning method
90
- )
91
-
92
- def _require_current_video(self) -> VideoFile:
93
- """Return the current VideoFile or raise if it has not been initialized."""
94
- if self.current_video is None:
95
- raise RuntimeError("Current video instance is not set")
96
- return self.current_video
97
-
98
- @contextmanager
99
- def _file_lock(self, path: Path):
100
- """
101
- Create a file lock to prevent duplicate processing of the same video.
102
-
103
- This context manager creates a .lock file alongside the video file.
104
- If the lock file already exists, it checks if it's stale (older than
105
- STALE_LOCK_SECONDS) and reclaims it if necessary. If it's not stale,
106
- we now WAIT (up to MAX_LOCK_WAIT_SECONDS) instead of failing immediately.
107
- """
108
- lock_path = Path(str(path) + ".lock")
109
- fd = None
110
- try:
111
- deadline = time.time() + MAX_LOCK_WAIT_SECONDS
112
- while True:
113
- try:
114
- # Atomic create; fail if exists
115
- fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
116
- break # acquired
117
- except FileExistsError:
118
- # Check for stale lock
119
- age = None
120
- try:
121
- st = os.stat(lock_path)
122
- age = time.time() - st.st_mtime
123
- except FileNotFoundError:
124
- # Race: lock removed between exists and stat; retry acquire in next loop
125
- age = None
126
-
127
- if age is not None and age > STALE_LOCK_SECONDS:
128
- try:
129
- logger.warning(
130
- "Stale lock detected for %s (age %.0fs). Reclaiming lock...",
131
- path,
132
- age,
133
- )
134
- lock_path.unlink()
135
- except Exception as e:
136
- logger.warning(
137
- "Failed to remove stale lock %s: %s", lock_path, e
138
- )
139
- # Loop continues and retries acquire immediately
140
- continue
141
-
142
- # Not stale: wait until deadline, then give up gracefully
143
- if time.time() >= deadline:
144
- raise ValueError(f"File already being processed: {path}")
145
- time.sleep(1.0)
146
-
147
- os.write(fd, b"lock")
148
- os.close(fd)
149
- fd = None
150
- yield
151
- finally:
152
- try:
153
- if fd is not None:
154
- os.close(fd)
155
- if lock_path.exists():
156
- lock_path.unlink()
157
- except OSError:
158
- pass
159
-
160
- def processed(self) -> bool:
161
- """Indicates if the current file has already been processed."""
162
- return getattr(self, "_processed", False)
163
-
164
- def import_and_anonymize(
165
- self,
166
- file_path: Union[Path, str],
167
- center_name: str,
168
- processor_name: str,
169
- save_video: bool = True,
170
- delete_source: bool = True,
171
- ) -> "VideoFile|None":
172
- """
173
- High-level helper that orchestrates the complete video import and anonymization process.
174
- Uses the central video instance pattern for improved state management.
175
- """
176
- # DEFENSIVE: Initialize processing_context immediately to prevent KeyError crashes
177
- self.processing_context = {"file_path": Path(file_path)}
178
-
179
- try:
180
- # Initialize processing context
181
- self._initialize_processing_context(
182
- file_path, center_name, processor_name, save_video, delete_source
183
- )
184
-
185
- # Validate and prepare file (may raise ValueError if another worker holds a non-stale lock)
186
- try:
187
- self._validate_and_prepare_file()
188
- except ValueError as ve:
189
- # Relaxed behavior: if another process is working on this file, skip cleanly
190
- if "already being processed" in str(ve):
191
- self.logger.info(f"Skipping {file_path}: {ve}")
192
- return None
193
- raise
194
-
195
- # Create or retrieve video instance
196
- self._create_or_retrieve_video_instance()
197
-
198
- # Create sensitive meta file, ensure raw is moved out of processing folder watched by file watcher.
199
- self._create_sensitive_file()
200
-
201
- # Setup processing environment
202
- self._setup_processing_environment()
203
-
204
- # Process frames and metadata
205
- self._process_frames_and_metadata()
206
-
207
- # Finalize processing
208
- self._finalize_processing()
209
-
210
- # Move files and cleanup
211
- self._cleanup_and_archive()
212
-
213
- return self.current_video
214
-
215
- except Exception as e:
216
- # Safe file path access - handles cases where processing_context wasn't initialized
217
- safe_file_path = getattr(self, "processing_context", {}).get(
218
- "file_path", file_path
219
- )
220
- # Debug: Log context state for troubleshooting
221
- context_keys = list(getattr(self, "processing_context", {}).keys())
222
- self.logger.debug(f"Context keys during error: {context_keys}")
223
- self.logger.error(
224
- f"Video import and anonymization failed for {safe_file_path}: {e}"
225
- )
226
- self._cleanup_on_error()
227
- raise
228
- finally:
229
- self._cleanup_processing_context()
230
-
231
- def _initialize_processing_context(
232
- self,
233
- file_path: Union[Path, str],
234
- center_name: str,
235
- processor_name: str,
236
- save_video: bool,
237
- delete_source: bool,
238
- ):
239
- """Initialize the processing context for the current video import."""
240
- self.processing_context = {
241
- "file_path": Path(file_path),
242
- "center_name": center_name,
243
- "processor_name": processor_name,
244
- "save_video": save_video,
245
- "delete_source": delete_source,
246
- "processing_started": False,
247
- "frames_extracted": False,
248
- "anonymization_completed": False,
249
- "error_reason": None,
250
- }
251
- self.original_file_path = str(file_path)
252
-
253
- self.logger.info(f"Initialized processing context for: {file_path}")
254
-
255
- def _validate_and_prepare_file(self):
256
- """
257
- Validate the video file and prepare for processing.
258
-
259
- Uses file locking to prevent concurrent processing of the same video file.
260
- This prevents race conditions where multiple workers might try to process
261
- the same video simultaneously.
262
-
263
- The lock is acquired here and held for the entire import process.
264
- See _file_lock() for lock reclamation logic.
265
- """
266
- file_path = self.processing_context["file_path"]
267
-
268
- # Acquire file lock to prevent concurrent processing
269
- # Lock will be held until finally block in import_and_anonymize()
270
- try:
271
- self.processing_context["_lock_context"] = self._file_lock(file_path)
272
- self.processing_context["_lock_context"].__enter__()
273
- except Exception:
274
- self._cleanup_processing_context()
275
- raise
276
-
277
- self.logger.info("Acquired file lock for: %s", file_path)
278
-
279
- # Check if already processed (memory-based check)
280
- if str(file_path) in self.processed_files:
281
- self.logger.info("File %s already processed, skipping", file_path)
282
- self._processed = True
283
- raise ValueError(f"File already processed: {file_path}")
284
-
285
- # Check file exists
286
- if not file_path.exists():
287
- raise FileNotFoundError(f"Video file not found: {file_path}")
288
-
289
- self.logger.info("File validation completed for: %s", file_path)
290
-
291
- def _create_or_retrieve_video_instance(self):
292
- """Create or retrieve the VideoFile instance and move to final storage."""
293
-
294
- self.logger.info("Creating VideoFile instance...")
295
-
296
- self.current_video = VideoFile.create_from_file_initialized(
297
- file_path=self.processing_context["file_path"],
298
- center_name=self.processing_context["center_name"],
299
- processor_name=self.processing_context["processor_name"],
300
- delete_source=self.processing_context["delete_source"],
301
- save_video_file=self.processing_context["save_video"],
302
- )
303
- self.current_video_id = self.current_video.pk
304
-
305
- if not self.current_video:
306
- raise RuntimeError("Failed to create VideoFile instance")
307
-
308
- # Immediately move to final storage locations
309
- self._move_to_final_storage()
310
-
311
- self.logger.info("Created VideoFile with UUID: %s", self.current_video.uuid)
312
-
313
- # Get and mark processing state
314
- state = VideoFile.get_or_create_state(self.current_video)
315
- if not state:
316
- raise RuntimeError("Failed to create VideoFile state")
317
-
318
- state.mark_processing_started(save=True)
319
- self.processing_context["processing_started"] = True
320
-
321
- def _move_to_final_storage(self):
322
- """
323
- Move video from raw_videos to final storage locations.
324
- - Raw video → /data/videos (raw_file_path)
325
- - Processed video will later → /data/anonym_videos (file_path)
326
- """
327
- from endoreg_db.utils import data_paths
328
-
329
- source_path = Path(self.processing_context["file_path"])
330
- _current_video = self._require_current_video()
331
- videos_dir = Path(data_paths["import_video"])
332
- storage_root = Path(data_paths["storage"])
333
-
334
- videos_dir.mkdir(parents=True, exist_ok=True)
335
-
336
- # --- Derive stored_raw_path safely ---
337
- stored_raw_path = None
338
- try:
339
- if hasattr(_current_video, "get_raw_file_path"):
340
- candidate = _current_video.get_raw_file_path()
341
- if candidate:
342
- candidate_path = Path(candidate)
343
- # Accept only if under storage_root
344
- try:
345
- candidate_path.relative_to(storage_root)
346
- stored_raw_path = candidate_path
347
- except ValueError:
348
- # outside storage_root, reset
349
- stored_raw_path = None
350
- except Exception:
351
- stored_raw_path = None
352
-
353
- # Fallback: derive from UUID + suffix - ALWAYS use UUID for consistency
354
- if not stored_raw_path:
355
- suffix = source_path.suffix or ".mp4"
356
- uuid_str = getattr(_current_video, "uuid", None)
357
- if uuid_str:
358
- filename = f"{uuid_str}{suffix}"
359
- else:
360
- # Emergency fallback with timestamp to avoid conflicts
361
- import time
362
-
363
- timestamp = int(time.time())
364
- filename = f"video_{timestamp}{suffix}"
365
- self.logger.warning(
366
- "No UUID available, using timestamp-based filename: %s", filename
367
- )
368
- stored_raw_path = videos_dir / filename
369
- self.logger.debug("Using UUID-based raw filename: %s", filename)
370
-
371
- delete_source = bool(self.processing_context.get("delete_source", True))
372
- stored_raw_path.parent.mkdir(parents=True, exist_ok=True)
373
-
374
- # --- Move or copy raw video ---
375
- try:
376
- if delete_source:
377
- # Try atomic move first, fallback to copy+unlink
378
- try:
379
- os.replace(source_path, stored_raw_path)
380
- self.logger.info("Moved raw video to: %s", stored_raw_path)
381
- except Exception:
382
- shutil.copy2(source_path, stored_raw_path)
383
- os.remove(source_path)
384
- self.logger.info(
385
- "Copied & removed raw video to: %s", stored_raw_path
386
- )
387
- else:
388
- shutil.copy2(source_path, stored_raw_path)
389
- self.logger.info("Copied raw video to: %s", stored_raw_path)
390
- except Exception as e:
391
- self.logger.error("Failed to move/copy video to final storage: %s", e)
392
- raise
393
-
394
- # --- Ensure DB raw_file is relative to storage root ---
395
- try:
396
- rel_path = stored_raw_path.relative_to(storage_root)
397
- except Exception:
398
- rel_path = Path("videos") / stored_raw_path.name
399
-
400
- if _current_video.raw_file.name != rel_path.as_posix():
401
- _current_video.raw_file.name = rel_path.as_posix()
402
- _current_video.save(update_fields=["raw_file"])
403
- self.logger.info("Updated raw_file path to: %s", rel_path.as_posix())
404
-
405
- # --- Store for later stages ---
406
- self.processing_context["raw_video_path"] = stored_raw_path
407
- self.processing_context["video_filename"] = stored_raw_path.name
408
-
409
- def _setup_processing_environment(self):
410
- """Setup the processing environment without file movement."""
411
- video = self._require_current_video()
412
-
413
- # Initialize video specifications
414
- video.initialize_video_specs()
415
-
416
- # Extract frames BEFORE processing to prevent pipeline 1 conflicts
417
- self.logger.info("Pre-extracting frames to avoid pipeline conflicts...")
418
- try:
419
- frames_extracted = video.extract_frames(overwrite=False)
420
- if frames_extracted:
421
- self.processing_context["frames_extracted"] = True
422
- self.logger.info("Frame extraction completed successfully")
423
- # Initialize frame objects in database
424
- video.initialize_frames(video.get_frame_paths())
425
-
426
- # CRITICAL: Immediately save the frames_extracted state to database
427
- # to prevent refresh_from_db() in pipeline 1 from overriding it
428
- state = video.get_or_create_state()
429
- if not state.frames_extracted:
430
- state.frames_extracted = True
431
- state.save(update_fields=["frames_extracted"])
432
- self.logger.info("Persisted frames_extracted=True to database")
433
- else:
434
- self.logger.warning("Frame extraction failed, but continuing...")
435
- self.processing_context["frames_extracted"] = False
436
- except Exception as e:
437
- self.logger.warning(
438
- f"Frame extraction failed during setup: {e}, but continuing..."
439
- )
440
- self.processing_context["frames_extracted"] = False
441
-
442
- # Ensure default patient data
443
- self._ensure_default_patient_data(video_instance=video)
444
-
445
- self.logger.info("Processing environment setup completed")
446
-
447
- def _process_frames_and_metadata(self):
448
- """Process frames and extract metadata with anonymization."""
449
- # Check frame cleaning availability
450
- frame_cleaning_available, frame_cleaner = (
451
- self._ensure_frame_cleaning_available()
452
- )
453
- video = self._require_current_video()
454
-
455
- raw_file_field = video.raw_file
456
- has_raw_file = isinstance(raw_file_field, FieldFile) and bool(
457
- raw_file_field.name
458
- )
459
-
460
- if not (frame_cleaning_available and has_raw_file):
461
- self.logger.warning(
462
- "Frame cleaning not available or conditions not met, using fallback anonymization."
463
- )
464
- self._fallback_anonymize_video()
465
- return
466
-
467
- try:
468
- self.logger.info(
469
- "Starting frame-level anonymization with processor ROI masking..."
470
- )
471
-
472
- # Get processor ROI information
473
- endoscope_data_roi_nested, endoscope_image_roi = (
474
- self._get_processor_roi_info()
475
- )
476
-
477
- # Perform frame cleaning with timeout to prevent blocking
478
- from concurrent.futures import ThreadPoolExecutor
479
- from concurrent.futures import TimeoutError as FutureTimeoutError
480
-
481
- with ThreadPoolExecutor(max_workers=1) as executor:
482
- future = executor.submit(
483
- self._perform_frame_cleaning,
484
- endoscope_data_roi_nested,
485
- endoscope_image_roi,
486
- )
487
- try:
488
- # Increased timeout to better accommodate ffmpeg + OCR
489
- future.result(timeout=5000)
490
- self.processing_context["anonymization_completed"] = True
491
- self.logger.info(
492
- "Frame cleaning completed successfully within timeout"
493
- )
494
- except FutureTimeoutError:
495
- self.logger.warning(
496
- "Frame cleaning timed out; entering grace period check for cleaned output"
497
- )
498
- # Grace period: detect if cleaned file appears shortly after timeout
499
- grace_seconds = 60
500
- expected_cleaned_path: Optional[Path] = None
501
- processed_field = video.processed_file
502
- if isinstance(processed_field, FieldFile) and processed_field.name:
503
- try:
504
- expected_cleaned_path = Path(processed_field.path)
505
- except (NotImplementedError, TypeError, ValueError):
506
- expected_cleaned_path = None
507
- found = False
508
- if expected_cleaned_path is not None:
509
- for _ in range(grace_seconds):
510
- if expected_cleaned_path.exists():
511
- self.processing_context["cleaned_video_path"] = (
512
- expected_cleaned_path
513
- )
514
- self.processing_context["anonymization_completed"] = (
515
- True
516
- )
517
- self.logger.info(
518
- "Detected cleaned video during grace period: %s",
519
- expected_cleaned_path,
520
- )
521
- found = True
522
- break
523
- time.sleep(1)
524
- else:
525
- self._fallback_anonymize_video()
526
- if not found:
527
- raise TimeoutError(
528
- "Frame cleaning operation timed out - likely Ollama connection issue"
529
- )
530
-
531
- except Exception as e:
532
- self.logger.warning(
533
- "Frame cleaning failed (reason: %s), falling back to simple copy", e
534
- )
535
- # Try fallback anonymization when frame cleaning fails
536
- try:
537
- self._fallback_anonymize_video()
538
- except Exception as fallback_error:
539
- self.logger.error(
540
- "Fallback anonymization also failed: %s", fallback_error
541
- )
542
- # If even fallback fails, mark as not anonymized but continue import
543
- self.processing_context["anonymization_completed"] = False
544
- self.processing_context["error_reason"] = (
545
- f"Frame cleaning failed: {e}, Fallback failed: {fallback_error}"
546
- )
547
-
548
- def _save_anonymized_video(self):
549
- original_raw_file_path_to_delete = None
550
- original_raw_frame_dir_to_delete = None
551
- video = self._require_current_video()
552
- anonymized_video_path = video.get_target_anonymized_video_path()
553
-
554
- if not anonymized_video_path.exists():
555
- raise RuntimeError(
556
- f"Processed video file not found after assembly for {video.uuid}: {anonymized_video_path}"
557
- )
558
-
559
- new_processed_hash = get_video_hash(anonymized_video_path)
560
- if (
561
- video.__class__.objects.filter(processed_video_hash=new_processed_hash)
562
- .exclude(pk=video.pk)
563
- .exists()
564
- ):
565
- raise ValueError(
566
- f"Processed video hash {new_processed_hash} already exists for another video (Video: {video.uuid})."
567
- )
568
-
569
- video.processed_video_hash = new_processed_hash
570
- video.processed_file.name = anonymized_video_path.relative_to(
571
- STORAGE_DIR
572
- ).as_posix()
573
-
574
- update_fields = [
575
- "processed_video_hash",
576
- "processed_file",
577
- "frame_dir",
578
- ]
579
-
580
- if self.delete_source:
581
- original_raw_file_path_to_delete = video.get_raw_file_path()
582
- original_raw_frame_dir_to_delete = video.get_frame_dir_path()
583
-
584
- video.raw_file.name = ""
585
-
586
- update_fields.extend(["raw_file", "video_hash"])
587
-
588
- transaction.on_commit(
589
- lambda: _cleanup_raw_assets(
590
- video_uuid=video.uuid,
591
- raw_file_path=original_raw_file_path_to_delete,
592
- raw_frame_dir=original_raw_frame_dir_to_delete,
593
- )
594
- )
595
-
596
- video.save(update_fields=update_fields)
597
- if not isinstance(video.state, VideoState):
598
- try:
599
- video.get_or_create_state()
600
- except ValueError as e:
601
- raise RuntimeError(
602
- f"Video state not found for video {video.uuid}. Error {e}"
603
- )
604
-
605
- else:
606
- video.state.mark_anonymized(save=True)
607
- video.refresh_from_db()
608
- self.current_video = video
609
-
610
- return True
611
-
612
- def _fallback_anonymize_video(self):
613
- """
614
- Fallback to create anonymized video if lx_anonymizer is not available.
615
- """
616
- try:
617
- self.logger.info("Attempting fallback video anonymization...")
618
- video = self.current_video
619
- if video is None:
620
- self.logger.warning(
621
- "No VideoFile instance available for fallback anonymization"
622
- )
623
-
624
- # Strategy 2: Simple copy (no processing, just copy raw to processed)
625
- self.logger.info(
626
- "Using simple copy fallback (raw video will be used as 'processed' video)"
627
- )
628
- self.processing_context["anonymization_completed"] = False
629
- self.processing_context["use_raw_as_processed"] = True
630
- self.logger.warning(
631
- "Fallback: Video will be imported without anonymization (raw copy used)"
632
- )
633
- except Exception as e:
634
- self.logger.error(
635
- f"Error during fallback anonymization: {e}", exc_info=True
636
- )
637
- self.processing_context["anonymization_completed"] = False
638
- self.processing_context["error_reason"] = str(e)
639
-
640
- def _finalize_processing(self):
641
- """Finalize processing and update video state."""
642
- self.logger.info("Updating video processing state...")
643
-
644
- with transaction.atomic():
645
- video = self._require_current_video()
646
- try:
647
- video.refresh_from_db()
648
- except Exception as refresh_error:
649
- self.logger.warning(
650
- "Could not refresh VideoFile %s from DB: %s",
651
- video.uuid,
652
- refresh_error,
653
- )
654
-
655
- state = video.get_or_create_state()
656
-
657
- # Only mark frames as extracted if they were successfully extracted
658
- if self.processing_context.get("frames_extracted", False):
659
- state.frames_extracted = True
660
- self.logger.info("Marked frames as extracted in state")
661
- else:
662
- self.logger.warning("Frames were not extracted, not updating state")
663
-
664
- # Always mark these as true (metadata extraction attempts were made)
665
- state.frames_initialized = True
666
- state.video_meta_extracted = True
667
- state.text_meta_extracted = True
668
-
669
- # ✅ FIX: Only mark as processed if anonymization actually completed
670
- anonymization_completed = self.processing_context.get(
671
- "anonymization_completed", False
672
- )
673
- if anonymization_completed:
674
- state.mark_sensitive_meta_processed(save=False)
675
- self.logger.info(
676
- "Anonymization completed - marking sensitive meta as processed"
677
- )
678
- else:
679
- self.logger.warning(
680
- f"Anonymization NOT completed - NOT marking as processed. Reason: {self.processing_context.get('error_reason', 'Unknown')}"
681
- )
682
- # Explicitly mark as NOT processed
683
- state.sensitive_meta_processed = False
684
-
685
- # Save all state changes
686
- state.save()
687
- self.logger.info("Video processing state updated")
688
-
689
- # Signal completion
690
- self._signal_completion()
691
-
692
- def _cleanup_and_archive(self):
693
- """Move processed video to anonym_videos and cleanup."""
694
- from endoreg_db.utils import data_paths
695
-
696
- anonym_videos_dir = data_paths["anonym_video"] # /data/anonym_videos
697
- anonym_videos_dir.mkdir(parents=True, exist_ok=True)
698
-
699
- video = self._require_current_video()
700
-
701
- processed_video_path = None
702
- if "cleaned_video_path" in self.processing_context:
703
- processed_video_path = self.processing_context["cleaned_video_path"]
704
- else:
705
- raw_video_path = self.processing_context.get("raw_video_path")
706
- if raw_video_path and Path(raw_video_path).exists():
707
- # Use UUID-based naming to avoid conflicts
708
- suffix = Path(raw_video_path).suffix or ".mp4"
709
- processed_filename = f"processed_{video.uuid}{suffix}"
710
- processed_video_path = Path(raw_video_path).parent / processed_filename
711
- try:
712
- shutil.copy2(str(raw_video_path), str(processed_video_path))
713
- self.logger.info(
714
- "Copied raw video for processing: %s", processed_video_path
715
- )
716
- except Exception as exc:
717
- self.logger.error("Failed to copy raw video: %s", exc)
718
- processed_video_path = None
719
-
720
- if processed_video_path and Path(processed_video_path).exists():
721
- try:
722
- ext = Path(processed_video_path).suffix or ".mp4"
723
- anonym_video_filename = f"anonym_{video.uuid}{ext}"
724
- anonym_target_path = anonym_videos_dir / anonym_video_filename
725
-
726
- shutil.move(str(processed_video_path), str(anonym_target_path))
727
- self.logger.info("Moved processed video to: %s", anonym_target_path)
728
-
729
- if anonym_target_path.exists():
730
- try:
731
- storage_root = data_paths["storage"]
732
- relative_path = anonym_target_path.relative_to(storage_root)
733
- video.processed_file.name = str(relative_path)
734
- video.save(update_fields=["processed_file"])
735
- self.logger.info(
736
- "Updated processed_file path to: %s", relative_path
737
- )
738
- except Exception as exc:
739
- self.logger.error(
740
- "Failed to update processed_file path: %s", exc
741
- )
742
- video.processed_file.name = (
743
- f"anonym_videos/{anonym_video_filename}"
744
- )
745
- video.save(update_fields=["processed_file"])
746
- self.logger.info(
747
- "Updated processed_file path using fallback: %s",
748
- f"anonym_videos/{anonym_video_filename}",
749
- )
750
-
751
- self.processing_context["anonymization_completed"] = True
752
- else:
753
- self.logger.warning(
754
- "Processed video file not found after move: %s",
755
- anonym_target_path,
756
- )
757
- except Exception as exc:
758
- self.logger.error(
759
- "Failed to move processed video to anonym_videos: %s", exc
760
- )
761
- else:
762
- self.logger.warning(
763
- "No processed video available - processed_file will remain empty"
764
- )
765
-
766
- try:
767
- from endoreg_db.utils.paths import RAW_FRAME_DIR
768
-
769
- shutil.rmtree(RAW_FRAME_DIR, ignore_errors=True)
770
- self.logger.debug(
771
- "Cleaned up temporary frames directory: %s", RAW_FRAME_DIR
772
- )
773
- except Exception as exc:
774
- self.logger.warning("Failed to remove directory %s: %s", RAW_FRAME_DIR, exc)
775
-
776
- source_path = self.processing_context["file_path"]
777
- if self.processing_context["delete_source"] and Path(source_path).exists():
778
- try:
779
- os.remove(source_path)
780
- self.logger.info("Removed remaining source file: %s", source_path)
781
- except Exception as exc:
782
- self.logger.warning(
783
- "Failed to remove source file %s: %s", source_path, exc
784
- )
785
-
786
- if not video.processed_file or not storage_file_exists(video.processed_file):
787
- self.logger.warning(
788
- "No processed_file found after cleanup - video will be unprocessed"
789
- )
790
- try:
791
- video.anonymize(delete_original_raw=self.delete_source)
792
- video.save(update_fields=["processed_file"])
793
- self.logger.info("Late-stage anonymization succeeded")
794
- except Exception as e:
795
- self.logger.error("Late-stage anonymization failed: %s", e)
796
- self.processing_context["anonymization_completed"] = False
797
-
798
- self.logger.info("Cleanup and archiving completed")
799
-
800
- self.processed_files.add(str(self.processing_context["file_path"]))
801
-
802
- with transaction.atomic():
803
- video.refresh_from_db()
804
- if hasattr(video, "state") and self.processing_context.get(
805
- "anonymization_completed"
806
- ):
807
- if not isinstance(video.state, VideoState):
808
- try:
809
- video.get_or_create_state()
810
- except:
811
- raise RuntimeError(
812
- f"Video state not found for video {video.uuid}"
813
- )
814
-
815
- video.state.mark_sensitive_meta_processed(save=True)
816
-
817
- self.logger.info(
818
- "Import and anonymization completed for VideoFile UUID: %s", video.uuid
819
- )
820
- self.logger.info("Raw video stored in: /data/videos")
821
- self.logger.info("Processed video stored in: /data/anonym_videos")
822
-
823
- def _create_sensitive_file(
824
- self,
825
- video_instance: VideoFile | None = None,
826
- file_path: Path | str | None = None,
827
- ) -> Path:
828
- """Create or move a sensitive copy of the raw video file inside storage."""
829
-
830
- video = video_instance or self._require_current_video()
831
- raw_field: FieldFile | None = getattr(video, "raw_file", None)
832
-
833
- def copy_into_sensitive(source: Path) -> Path:
834
- target_dir = SENSITIVE_VIDEO_DIR
835
- if not target_dir.exists():
836
- self.logger.info("Creating sensitive file directory: %s", target_dir)
837
- os.makedirs(target_dir, exist_ok=True)
838
-
839
- target_name = source.name or "raw_video"
840
- target_file_path = target_dir / target_name
841
-
842
- if source != target_file_path:
843
- try:
844
- shutil.copy2(source, target_file_path)
845
- self.logger.info(
846
- "Copied raw file to sensitive directory: %s", target_file_path
847
- )
848
- except Exception as exc:
849
- self.logger.warning(
850
- "Failed to copy raw file to sensitive dir: %s", exc
851
- )
852
- shutil.copy(source, target_file_path)
853
- self.logger.info(
854
- "Fallback copy succeeded for sensitive directory: %s",
855
- target_file_path,
856
- )
857
- else:
858
- self.logger.debug(
859
- "Source path already in sensitive directory: %s",
860
- target_file_path,
861
- )
862
-
863
- return target_file_path
864
-
865
- target_file_path: Path | None = None
866
-
867
- # Prefer an on-disk path from the FieldFile when available
868
- if raw_field:
869
- try:
870
- local_candidate = Path(raw_field.path)
871
- if local_candidate.exists():
872
- target_file_path = copy_into_sensitive(local_candidate)
873
- except Exception:
874
- target_file_path = None
875
-
876
- if target_file_path is None and storage_file_exists(raw_field):
877
- try:
878
- with ensure_local_file(raw_field) as temp_source:
879
- target_file_path = copy_into_sensitive(Path(temp_source))
880
- except Exception as exc:
881
- self.logger.warning(
882
- "Failed to download raw_field for sensitive copy: %s", exc
883
- )
884
-
885
- if target_file_path is None and file_path is not None:
886
- file_candidate = Path(file_path)
887
- if file_candidate.exists():
888
- target_file_path = copy_into_sensitive(file_candidate)
889
-
890
- if target_file_path is None:
891
- context_path = self.processing_context.get("raw_video_path")
892
- if context_path:
893
- context_candidate = Path(context_path)
894
- if context_candidate.exists():
895
- target_file_path = copy_into_sensitive(context_candidate)
896
-
897
- if target_file_path is None:
898
- raise ValueError("No file path available for creating sensitive file")
899
- if not raw_field:
900
- raise ValueError(
901
- "VideoFile must have a raw_file to create a sensitive file"
902
- )
903
-
904
- try:
905
- from endoreg_db.utils import data_paths
906
-
907
- storage_root = data_paths["storage"]
908
- relative_path = target_file_path.relative_to(storage_root)
909
- video.raw_file.name = relative_path.as_posix()
910
- video.save(update_fields=["raw_file"])
911
- self.logger.info(
912
- "Updated video.raw_file to point to sensitive location: %s",
913
- relative_path,
914
- )
915
- except Exception as exc:
916
- self.logger.warning("Failed to set relative path, using fallback: %s", exc)
917
- video.raw_file.name = f"videos/sensitive/{target_file_path.name}"
918
- video.save(update_fields=["raw_file"])
919
- self.logger.info(
920
- "Updated video.raw_file using fallback method: videos/sensitive/%s",
921
- target_file_path.name,
922
- )
923
-
924
- self.processing_context["raw_video_path"] = target_file_path
925
- self.processing_context["video_filename"] = target_file_path.name
926
-
927
- self.logger.info(
928
- "Created sensitive file for %s at %s", video.uuid, target_file_path
929
- )
930
- return target_file_path
931
-
932
- def _get_processor_roi_info(
933
- self,
934
- ) -> Tuple[Optional[List[List[Dict[str, Any]]]], Optional[Dict[str, Any]]]:
935
- """Get processor ROI information for masking."""
936
- endoscope_data_roi_nested = None
937
- endoscope_image_roi = None
938
-
939
- video = self._require_current_video()
940
-
941
- try:
942
- video_meta = getattr(video, "video_meta", None)
943
- processor = getattr(video_meta, "processor", None) if video_meta else None
944
- if processor:
945
- endoscope_image_roi = processor.get_roi_endoscope_image()
946
- endoscope_data_roi_nested = processor.get_sensitive_rois()
947
- self.logger.info(
948
- "Retrieved processor ROI information: endoscope_image_roi=%s",
949
- endoscope_image_roi,
950
- )
951
- else:
952
- self.logger.warning(
953
- "No processor found for video %s, proceeding without ROI masking",
954
- video.uuid,
955
- )
956
- except Exception as exc:
957
- self.logger.error("Failed to retrieve processor ROI information: %s", exc)
958
-
959
- # Convert dict to nested list if necessary to match return type
960
- if isinstance(endoscope_data_roi_nested, dict):
961
- # Convert dict[str, dict[str, int | None] | None] to List[List[Dict[str, Any]]]
962
- converted_roi = []
963
- for key, value in endoscope_data_roi_nested.items():
964
- if isinstance(value, dict):
965
- converted_roi.append([value])
966
- elif value is None:
967
- converted_roi.append([])
968
- endoscope_data_roi_nested = converted_roi
969
-
970
- return endoscope_data_roi_nested, endoscope_image_roi
971
-
972
- def _ensure_default_patient_data(
973
- self, video_instance: VideoFile | None = None
974
- ) -> None:
975
- """Ensure minimum patient data is present on the video's SensitiveMeta."""
976
-
977
- video = video_instance or self._require_current_video()
978
-
979
- sensitive_meta = getattr(video, "sensitive_meta", None)
980
- if not sensitive_meta:
981
- self.logger.info(
982
- "No SensitiveMeta found for video %s, creating default", video.uuid
983
- )
984
- default_data = {
985
- "patient_first_name": "Patient",
986
- "patient_last_name": "Unknown",
987
- "patient_dob": date(1990, 1, 1),
988
- "examination_date": date.today(),
989
- "center_name": video.center.name
990
- if video.center
991
- else "university_hospital_wuerzburg",
992
- }
993
- try:
994
- sensitive_meta = SensitiveMeta.create_from_dict(default_data)
995
- video.sensitive_meta = sensitive_meta
996
- video.save(update_fields=["sensitive_meta"])
997
- self.logger.info(
998
- "Created default SensitiveMeta for video %s", video.uuid
999
- )
1000
- except Exception as exc:
1001
- self.logger.error(
1002
- "Failed to create default SensitiveMeta for video %s: %s",
1003
- video.uuid,
1004
- exc,
1005
- )
1006
- return
1007
- else:
1008
- state = video.get_or_create_state()
1009
- state.mark_sensitive_meta_processed(save=True)
1010
-
1011
- def _ensure_frame_cleaning_available(self):
1012
- """
1013
- Ensure frame cleaning modules are available by adding lx-anonymizer to path.
1014
-
1015
- Returns:
1016
- Tuple of (availability_flag, FrameCleaner_class, ReportReader_class)
1017
- """
1018
- try:
1019
- from lx_anonymizer import FrameCleaner
1020
- except Exception as e:
1021
- self.logger.warning(
1022
- f"Frame cleaning not available: {e} Please install or update lx_anonymizer."
1023
- )
1024
- _available = False
1025
- FrameCleaner = None
1026
-
1027
- assert FrameCleaner is not None
1028
- frame_cleaner = FrameCleaner()
1029
- _available = True
1030
-
1031
- return _available, frame_cleaner
1032
-
1033
- def _perform_frame_cleaning(self, endoscope_data_roi_nested, endoscope_image_roi):
1034
- """Perform frame cleaning and anonymization."""
1035
- # Instantiate frame cleaner
1036
- is_available, frame_cleaner = self._ensure_frame_cleaning_available()
1037
-
1038
- if not is_available or frame_cleaner is None:
1039
- raise RuntimeError("Frame cleaning not available")
1040
-
1041
- # Prepare parameters for frame cleaning
1042
- raw_video_path = self.processing_context.get("raw_video_path")
1043
-
1044
- if not raw_video_path or not Path(raw_video_path).exists():
1045
- try:
1046
- self.current_video = self._require_current_video()
1047
- raw_video_path = self.current_video.get_raw_file_path()
1048
- except Exception:
1049
- raise RuntimeError(f"Raw video path not found: {raw_video_path}")
1050
-
1051
- # Create temporary output path for cleaned video using UUID to avoid naming conflicts
1052
- video = self._require_current_video()
1053
- # Ensure raw_video_path is not None
1054
- if not raw_video_path:
1055
- raise RuntimeError(
1056
- "raw_video_path is None, cannot construct cleaned_video_path"
1057
- )
1058
- suffix = Path(raw_video_path).suffix or ".mp4"
1059
- cleaned_filename = f"cleaned_{video.uuid}{suffix}"
1060
- cleaned_video_path = Path(raw_video_path).parent / cleaned_filename
1061
- self.logger.debug("Using UUID-based cleaned filename: %s", cleaned_filename)
1062
-
1063
- # Clean video with ROI masking (heavy I/O operation)
1064
- actual_cleaned_path, extracted_metadata = frame_cleaner.clean_video(
1065
- video_path=Path(raw_video_path),
1066
- endoscope_image_roi=endoscope_image_roi,
1067
- endoscope_data_roi_nested=endoscope_data_roi_nested,
1068
- output_path=cleaned_video_path,
1069
- technique="mask_overlay",
1070
- )
1071
-
1072
- # Store cleaned video path for later use in _cleanup_and_archive
1073
- self.processing_context["cleaned_video_path"] = actual_cleaned_path
1074
- self.processing_context["extracted_metadata"] = extracted_metadata
1075
-
1076
- # Update sensitive metadata with extracted information
1077
- self._update_sensitive_metadata(extracted_metadata)
1078
- self.logger.info(
1079
- f"Extracted metadata from frame cleaning: {extracted_metadata}"
1080
- )
1081
-
1082
- self.logger.info(
1083
- f"Frame cleaning with ROI masking completed: {actual_cleaned_path}"
1084
- )
1085
- self.logger.info("Cleaned video will be moved to anonym_videos during cleanup")
1086
-
1087
- def _update_sensitive_metadata(self, extracted_metadata: Dict[str, Any]):
1088
- """
1089
- Update sensitive metadata with extracted information.
1090
- Args:
1091
- extracted_metadata (Dict[str, Any]): Extracted metadata to update.
1092
- """
1093
- video = self._require_current_video()
1094
- sensitive_meta = getattr(video, "sensitive_meta", None)
1095
-
1096
- if not (sensitive_meta and extracted_metadata):
1097
- return
1098
-
1099
- sm = sensitive_meta
1100
- updated_fields = []
1101
-
1102
- # Ensure center is set from video.center if not in extracted_metadata
1103
- metadata_to_update = extracted_metadata.copy()
1104
-
1105
- # FIX: Set center object instead of center_name string
1106
- if not hasattr(sm, "center") or not sm.center:
1107
- if video.center:
1108
- metadata_to_update["center"] = video.center
1109
- self.logger.debug(
1110
- "Added center object '%s' to metadata for SensitiveMeta update",
1111
- video.center.name,
1112
- )
1113
- else:
1114
- center_name = metadata_to_update.get("center_name")
1115
- if center_name:
1116
- try:
1117
- from ...models.administration import Center
1118
-
1119
- center_obj = Center.objects.get(name=center_name)
1120
- metadata_to_update["center"] = center_obj
1121
- self.logger.debug(
1122
- "Loaded center object '%s' from center_name", center_name
1123
- )
1124
- metadata_to_update.pop("center_name", None)
1125
- except Center.DoesNotExist:
1126
- self.logger.error(
1127
- "Center '%s' not found in database", center_name
1128
- )
1129
- return
1130
-
1131
- try:
1132
- sm.update_from_dict(metadata_to_update)
1133
- updated_fields = list(
1134
- extracted_metadata.keys()
1135
- ) # Only log originally extracted fields
1136
- except KeyError as e:
1137
- self.logger.warning(f"Failed to update SensitiveMeta field {e}")
1138
- return
1139
-
1140
- if updated_fields:
1141
- try:
1142
- sm.save() # Remove update_fields to allow all necessary fields to be saved
1143
- self.logger.info(
1144
- "Updated SensitiveMeta fields for video %s: %s",
1145
- video.uuid,
1146
- updated_fields,
1147
- )
1148
-
1149
- state = video.get_or_create_state()
1150
- state.mark_sensitive_meta_processed(save=True)
1151
- self.logger.info(
1152
- "Marked sensitive metadata as processed for video %s", video.uuid
1153
- )
1154
- except Exception as e:
1155
- self.logger.error(f"Failed to save SensitiveMeta: {e}")
1156
- raise # Re-raise to trigger fallback in calling method
1157
- else:
1158
- self.logger.info(
1159
- "No SensitiveMeta fields updated for video %s - all existing values preserved",
1160
- video.uuid,
1161
- )
1162
-
1163
- def _signal_completion(self):
1164
- """Signal completion to the tracking system."""
1165
- try:
1166
- video = self._require_current_video()
1167
-
1168
- raw_field: FieldFile | None = getattr(video, "raw_file", None)
1169
- raw_exists = storage_file_exists(raw_field)
1170
-
1171
- video_processing_complete = (
1172
- video.sensitive_meta is not None
1173
- and video.video_meta is not None
1174
- and raw_exists
1175
- )
1176
-
1177
- if video_processing_complete:
1178
- self.logger.info(
1179
- "Video %s processing completed successfully - ready for validation",
1180
- video.uuid,
1181
- )
1182
-
1183
- # Update completion flags if they exist
1184
- completion_fields = []
1185
- for field_name in [
1186
- "import_completed",
1187
- "processing_complete",
1188
- "ready_for_validation",
1189
- ]:
1190
- if hasattr(video, field_name):
1191
- setattr(video, field_name, True)
1192
- completion_fields.append(field_name)
1193
-
1194
- if completion_fields:
1195
- video.save(update_fields=completion_fields)
1196
- self.logger.info("Updated completion flags: %s", completion_fields)
1197
- else:
1198
- self.logger.warning(
1199
- "Video %s processing incomplete - missing required components",
1200
- video.uuid,
1201
- )
1202
-
1203
- except Exception as e:
1204
- self.logger.warning(f"Failed to signal completion status: {e}")
1205
-
1206
- def _cleanup_on_error(self):
1207
- """Cleanup processing context on error."""
1208
- if self.current_video and hasattr(self.current_video, "state"):
1209
- if self.current_video.state is None:
1210
- try:
1211
- self.current_video.get_or_create_state()
1212
- except Exception as e:
1213
- self.logger.warning(
1214
- f"Video state not found for video {self.current_video.uuid} during error cleanup {e}"
1215
- )
1216
- return
1217
- self.current_video.state = self.current_video.get_or_create_state()
1218
- try:
1219
- if self.original_file_path is not None:
1220
- assert Path(self.original_file_path).exists()
1221
- else:
1222
- self.logger.warning("Original file path is None")
1223
- self.logger.info("Marked video import as failed in state")
1224
- raw_file_path = getattr(self.current_video.raw_file, "path", None)
1225
- original_file_path = self.original_file_path
1226
- if raw_file_path and original_file_path:
1227
- shutil.copy2(str(raw_file_path), str(original_file_path))
1228
- else:
1229
- self.logger.warning(
1230
- "Cannot restore original raw file: path is None"
1231
- )
1232
- except AssertionError:
1233
- self.logger.warning("Original file path does not exist")
1234
- try:
1235
- if not isinstance(self.current_video.state, VideoState):
1236
- logger.error("Current video is none after Assertion for Video File")
1237
- raise AssertionError
1238
-
1239
- if self.processing_context.get("processing_started"):
1240
- self.current_video.state.frames_extracted = False
1241
- self.current_video.state.frames_initialized = False
1242
- self.current_video.state.video_meta_extracted = False
1243
- self.current_video.state.text_meta_extracted = False
1244
- self.current_video.state.save()
1245
-
1246
- except Exception as e:
1247
- self.logger.warning(f"Error during cleanup: {e}")
1248
-
1249
- def _cleanup_processing_context(self):
1250
- """
1251
- Cleanup processing context and release file lock.
1252
-
1253
- This method is always called in the finally block of import_and_anonymize()
1254
- to ensure the file lock is released even if processing fails.
1255
- """
1256
- # DEFENSIVE: Ensure processing_context exists before accessing it
1257
- if not hasattr(self, "processing_context"):
1258
- self.processing_context = {}
1259
-
1260
- try:
1261
- # Release file lock if it was acquired
1262
- lock_context = self.processing_context.get("_lock_context")
1263
- if lock_context is not None:
1264
- try:
1265
- lock_context.__exit__(None, None, None)
1266
- self.logger.info("Released file lock")
1267
- except Exception as e:
1268
- self.logger.warning(f"Error releasing file lock: {e}")
1269
-
1270
- # Remove file from processed set if processing failed
1271
- file_path = self.processing_context.get("file_path")
1272
- if file_path and not self.processing_context.get("anonymization_completed"):
1273
- file_path_str = str(file_path)
1274
- if file_path_str in self.processed_files:
1275
- self.processed_files.remove(file_path_str)
1276
- self.logger.info(
1277
- f"Removed {file_path_str} from processed files (failed processing)"
1278
- )
1279
-
1280
- except Exception as e:
1281
- self.logger.warning(f"Error during context cleanup: {e}")
1282
- finally:
1283
- # Reset context
1284
- self.current_video = None
1285
- self.processing_context = {}
1286
-
1287
-
1288
- # Convenience function for callers/tests that expect a module-level import_and_anonymize
1289
- def import_and_anonymize(
1290
- file_path,
1291
- center_name: str,
1292
- processor_name: str,
1293
- save_video: bool = True,
1294
- delete_source: bool = True,
1295
- ) -> VideoFile | None:
1296
- """Module-level helper that instantiates VideoImportService and runs import_and_anonymize.
1297
- Kept for backward compatibility with callers that import this function directly.
1298
- """
1299
- service = VideoImportService()
1300
- return service.import_and_anonymize(
1301
- file_path=file_path,
1302
- center_name=center_name,
1303
- processor_name=processor_name,
1304
- save_video=save_video,
1305
- delete_source=delete_source,
1306
- )