endoreg-db 0.8.8.9__py3-none-any.whl → 0.8.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (453) hide show
  1. endoreg_db/admin.py +10 -5
  2. endoreg_db/apps.py +4 -7
  3. endoreg_db/authz/auth.py +1 -0
  4. endoreg_db/authz/backends.py +1 -1
  5. endoreg_db/authz/management/commands/list_routes.py +2 -0
  6. endoreg_db/authz/middleware.py +8 -7
  7. endoreg_db/authz/permissions.py +21 -10
  8. endoreg_db/authz/policy.py +14 -19
  9. endoreg_db/authz/views_auth.py +14 -10
  10. endoreg_db/codemods/rename_datetime_fields.py +8 -1
  11. endoreg_db/exceptions.py +5 -2
  12. endoreg_db/forms/__init__.py +0 -1
  13. endoreg_db/forms/examination_form.py +4 -3
  14. endoreg_db/forms/patient_finding_intervention_form.py +30 -8
  15. endoreg_db/forms/patient_form.py +9 -13
  16. endoreg_db/forms/questionnaires/__init__.py +1 -1
  17. endoreg_db/forms/settings/__init__.py +4 -1
  18. endoreg_db/forms/unit.py +2 -1
  19. endoreg_db/helpers/count_db.py +17 -14
  20. endoreg_db/helpers/default_objects.py +2 -1
  21. endoreg_db/helpers/download_segmentation_model.py +4 -3
  22. endoreg_db/helpers/interact.py +0 -5
  23. endoreg_db/helpers/test_video_helper.py +33 -25
  24. endoreg_db/import_files/__init__.py +1 -1
  25. endoreg_db/import_files/context/__init__.py +1 -1
  26. endoreg_db/import_files/context/default_sensitive_meta.py +11 -9
  27. endoreg_db/import_files/context/ensure_center.py +4 -4
  28. endoreg_db/import_files/context/file_lock.py +3 -3
  29. endoreg_db/import_files/context/import_context.py +11 -12
  30. endoreg_db/import_files/context/validate_directories.py +1 -0
  31. endoreg_db/import_files/file_storage/create_report_file.py +57 -34
  32. endoreg_db/import_files/file_storage/create_video_file.py +64 -35
  33. endoreg_db/import_files/file_storage/sensitive_meta_storage.py +5 -2
  34. endoreg_db/import_files/file_storage/state_management.py +146 -83
  35. endoreg_db/import_files/file_storage/storage.py +5 -1
  36. endoreg_db/import_files/processing/report_processing/report_anonymization.py +24 -19
  37. endoreg_db/import_files/processing/sensitive_meta_adapter.py +3 -3
  38. endoreg_db/import_files/processing/video_processing/video_anonymization.py +18 -18
  39. endoreg_db/import_files/pseudonymization/k_anonymity.py +8 -9
  40. endoreg_db/import_files/pseudonymization/k_pseudonymity.py +16 -5
  41. endoreg_db/import_files/report_import_service.py +36 -30
  42. endoreg_db/import_files/video_import_service.py +27 -23
  43. endoreg_db/logger_conf.py +56 -40
  44. endoreg_db/management/__init__.py +1 -1
  45. endoreg_db/management/commands/__init__.py +1 -1
  46. endoreg_db/management/commands/check_auth.py +45 -38
  47. endoreg_db/management/commands/create_model_meta_from_huggingface.py +53 -2
  48. endoreg_db/management/commands/create_multilabel_model_meta.py +54 -19
  49. endoreg_db/management/commands/fix_missing_patient_data.py +105 -71
  50. endoreg_db/management/commands/fix_video_paths.py +75 -54
  51. endoreg_db/management/commands/import_report.py +1 -3
  52. endoreg_db/management/commands/list_routes.py +2 -0
  53. endoreg_db/management/commands/load_ai_model_data.py +8 -2
  54. endoreg_db/management/commands/load_ai_model_label_data.py +0 -1
  55. endoreg_db/management/commands/load_center_data.py +3 -3
  56. endoreg_db/management/commands/load_distribution_data.py +35 -38
  57. endoreg_db/management/commands/load_endoscope_data.py +0 -3
  58. endoreg_db/management/commands/load_examination_data.py +20 -4
  59. endoreg_db/management/commands/load_finding_data.py +18 -3
  60. endoreg_db/management/commands/load_gender_data.py +17 -24
  61. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +95 -85
  62. endoreg_db/management/commands/load_information_source.py +0 -3
  63. endoreg_db/management/commands/load_lab_value_data.py +14 -3
  64. endoreg_db/management/commands/load_legacy_data.py +303 -0
  65. endoreg_db/management/commands/load_name_data.py +1 -2
  66. endoreg_db/management/commands/load_pdf_type_data.py +4 -8
  67. endoreg_db/management/commands/load_profession_data.py +0 -1
  68. endoreg_db/management/commands/load_report_reader_flag_data.py +0 -4
  69. endoreg_db/management/commands/load_requirement_data.py +6 -2
  70. endoreg_db/management/commands/load_unit_data.py +0 -4
  71. endoreg_db/management/commands/load_user_groups.py +5 -7
  72. endoreg_db/management/commands/model_input.py +169 -0
  73. endoreg_db/management/commands/register_ai_model.py +22 -16
  74. endoreg_db/management/commands/setup_endoreg_db.py +110 -32
  75. endoreg_db/management/commands/storage_management.py +14 -8
  76. endoreg_db/management/commands/summarize_db_content.py +154 -63
  77. endoreg_db/management/commands/train_image_multilabel_model.py +144 -0
  78. endoreg_db/management/commands/validate_video_files.py +82 -50
  79. endoreg_db/management/commands/video_validation.py +4 -6
  80. endoreg_db/migrations/0001_initial.py +112 -63
  81. endoreg_db/migrations/__init__.py +0 -0
  82. endoreg_db/models/__init__.py +8 -0
  83. endoreg_db/models/administration/ai/active_model.py +5 -5
  84. endoreg_db/models/administration/ai/ai_model.py +41 -18
  85. endoreg_db/models/administration/ai/model_type.py +1 -0
  86. endoreg_db/models/administration/case/case.py +22 -22
  87. endoreg_db/models/administration/center/__init__.py +5 -5
  88. endoreg_db/models/administration/center/center.py +6 -2
  89. endoreg_db/models/administration/center/center_resource.py +18 -4
  90. endoreg_db/models/administration/center/center_shift.py +3 -1
  91. endoreg_db/models/administration/center/center_waste.py +6 -2
  92. endoreg_db/models/administration/person/__init__.py +1 -1
  93. endoreg_db/models/administration/person/employee/__init__.py +1 -1
  94. endoreg_db/models/administration/person/employee/employee_type.py +3 -1
  95. endoreg_db/models/administration/person/examiner/__init__.py +1 -1
  96. endoreg_db/models/administration/person/examiner/examiner.py +10 -2
  97. endoreg_db/models/administration/person/names/first_name.py +6 -4
  98. endoreg_db/models/administration/person/names/last_name.py +4 -3
  99. endoreg_db/models/administration/person/patient/__init__.py +1 -1
  100. endoreg_db/models/administration/person/patient/patient.py +0 -1
  101. endoreg_db/models/administration/person/patient/patient_external_id.py +0 -1
  102. endoreg_db/models/administration/person/person.py +1 -1
  103. endoreg_db/models/administration/product/__init__.py +7 -6
  104. endoreg_db/models/administration/product/product.py +6 -2
  105. endoreg_db/models/administration/product/product_group.py +9 -7
  106. endoreg_db/models/administration/product/product_material.py +9 -2
  107. endoreg_db/models/administration/product/reference_product.py +64 -15
  108. endoreg_db/models/administration/qualification/qualification.py +3 -1
  109. endoreg_db/models/administration/shift/shift.py +3 -1
  110. endoreg_db/models/administration/shift/shift_type.py +12 -4
  111. endoreg_db/models/aidataset/__init__.py +5 -0
  112. endoreg_db/models/aidataset/aidataset.py +193 -0
  113. endoreg_db/models/label/__init__.py +1 -1
  114. endoreg_db/models/label/label.py +10 -2
  115. endoreg_db/models/label/label_set.py +3 -1
  116. endoreg_db/models/label/label_video_segment/_create_from_video.py +6 -2
  117. endoreg_db/models/label/label_video_segment/label_video_segment.py +148 -44
  118. endoreg_db/models/media/__init__.py +12 -5
  119. endoreg_db/models/media/frame/__init__.py +1 -1
  120. endoreg_db/models/media/frame/frame.py +34 -8
  121. endoreg_db/models/media/pdf/__init__.py +2 -1
  122. endoreg_db/models/media/pdf/raw_pdf.py +11 -4
  123. endoreg_db/models/media/pdf/report_file.py +6 -2
  124. endoreg_db/models/media/pdf/report_reader/__init__.py +3 -3
  125. endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +15 -5
  126. endoreg_db/models/media/video/create_from_file.py +20 -41
  127. endoreg_db/models/media/video/pipe_1.py +75 -30
  128. endoreg_db/models/media/video/pipe_2.py +37 -12
  129. endoreg_db/models/media/video/video_file.py +36 -24
  130. endoreg_db/models/media/video/video_file_ai.py +235 -70
  131. endoreg_db/models/media/video/video_file_anonymize.py +240 -65
  132. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -1
  133. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +3 -1
  134. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +30 -9
  135. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +95 -29
  136. endoreg_db/models/media/video/video_file_frames/_get_frame.py +13 -3
  137. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -1
  138. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +15 -3
  139. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +15 -3
  140. endoreg_db/models/media/video/video_file_frames/_get_frames.py +7 -2
  141. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +109 -23
  142. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +111 -27
  143. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +46 -13
  144. endoreg_db/models/media/video/video_file_io.py +85 -33
  145. endoreg_db/models/media/video/video_file_meta/__init__.py +6 -6
  146. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +17 -4
  147. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +28 -7
  148. endoreg_db/models/media/video/video_file_meta/get_fps.py +46 -13
  149. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +81 -20
  150. endoreg_db/models/media/video/video_file_meta/text_meta.py +61 -20
  151. endoreg_db/models/media/video/video_file_meta/video_meta.py +40 -12
  152. endoreg_db/models/media/video/video_file_segments.py +118 -27
  153. endoreg_db/models/media/video/video_metadata.py +25 -6
  154. endoreg_db/models/media/video/video_processing.py +54 -15
  155. endoreg_db/models/medical/__init__.py +3 -13
  156. endoreg_db/models/medical/contraindication/__init__.py +3 -1
  157. endoreg_db/models/medical/disease.py +18 -6
  158. endoreg_db/models/medical/event.py +6 -2
  159. endoreg_db/models/medical/examination/__init__.py +5 -1
  160. endoreg_db/models/medical/examination/examination.py +22 -6
  161. endoreg_db/models/medical/examination/examination_indication.py +23 -7
  162. endoreg_db/models/medical/examination/examination_time.py +6 -2
  163. endoreg_db/models/medical/finding/__init__.py +3 -1
  164. endoreg_db/models/medical/finding/finding.py +37 -12
  165. endoreg_db/models/medical/finding/finding_classification.py +27 -8
  166. endoreg_db/models/medical/finding/finding_intervention.py +19 -6
  167. endoreg_db/models/medical/finding/finding_type.py +3 -1
  168. endoreg_db/models/medical/hardware/__init__.py +1 -1
  169. endoreg_db/models/medical/hardware/endoscope.py +14 -2
  170. endoreg_db/models/medical/laboratory/__init__.py +1 -1
  171. endoreg_db/models/medical/laboratory/lab_value.py +139 -39
  172. endoreg_db/models/medical/medication/__init__.py +7 -3
  173. endoreg_db/models/medical/medication/medication.py +3 -1
  174. endoreg_db/models/medical/medication/medication_indication.py +3 -1
  175. endoreg_db/models/medical/medication/medication_indication_type.py +11 -3
  176. endoreg_db/models/medical/medication/medication_intake_time.py +3 -1
  177. endoreg_db/models/medical/medication/medication_schedule.py +3 -1
  178. endoreg_db/models/medical/patient/__init__.py +2 -10
  179. endoreg_db/models/medical/patient/medication_examples.py +3 -14
  180. endoreg_db/models/medical/patient/patient_disease.py +17 -5
  181. endoreg_db/models/medical/patient/patient_event.py +12 -4
  182. endoreg_db/models/medical/patient/patient_examination.py +52 -15
  183. endoreg_db/models/medical/patient/patient_examination_indication.py +15 -4
  184. endoreg_db/models/medical/patient/patient_finding.py +105 -29
  185. endoreg_db/models/medical/patient/patient_finding_classification.py +41 -12
  186. endoreg_db/models/medical/patient/patient_finding_intervention.py +11 -3
  187. endoreg_db/models/medical/patient/patient_lab_sample.py +6 -2
  188. endoreg_db/models/medical/patient/patient_lab_value.py +42 -10
  189. endoreg_db/models/medical/patient/patient_medication.py +25 -7
  190. endoreg_db/models/medical/patient/patient_medication_schedule.py +34 -10
  191. endoreg_db/models/metadata/model_meta.py +40 -12
  192. endoreg_db/models/metadata/model_meta_logic.py +51 -16
  193. endoreg_db/models/metadata/sensitive_meta.py +65 -28
  194. endoreg_db/models/metadata/sensitive_meta_logic.py +28 -26
  195. endoreg_db/models/metadata/video_meta.py +146 -39
  196. endoreg_db/models/metadata/video_prediction_logic.py +70 -21
  197. endoreg_db/models/metadata/video_prediction_meta.py +80 -27
  198. endoreg_db/models/operation_log.py +63 -0
  199. endoreg_db/models/other/__init__.py +10 -10
  200. endoreg_db/models/other/distribution/__init__.py +9 -7
  201. endoreg_db/models/other/distribution/base_value_distribution.py +3 -1
  202. endoreg_db/models/other/distribution/date_value_distribution.py +19 -5
  203. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +3 -1
  204. endoreg_db/models/other/distribution/numeric_value_distribution.py +34 -9
  205. endoreg_db/models/other/emission/__init__.py +1 -1
  206. endoreg_db/models/other/emission/emission_factor.py +9 -3
  207. endoreg_db/models/other/information_source.py +15 -5
  208. endoreg_db/models/other/material.py +3 -1
  209. endoreg_db/models/other/transport_route.py +3 -1
  210. endoreg_db/models/other/unit.py +6 -2
  211. endoreg_db/models/report/report.py +0 -1
  212. endoreg_db/models/requirement/requirement.py +84 -27
  213. endoreg_db/models/requirement/requirement_error.py +5 -6
  214. endoreg_db/models/requirement/requirement_evaluation/__init__.py +1 -1
  215. endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +8 -8
  216. endoreg_db/models/requirement/requirement_evaluation/get_values.py +3 -3
  217. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +24 -8
  218. endoreg_db/models/requirement/requirement_operator.py +28 -8
  219. endoreg_db/models/requirement/requirement_set.py +34 -11
  220. endoreg_db/models/state/__init__.py +1 -0
  221. endoreg_db/models/state/audit_ledger.py +9 -2
  222. endoreg_db/models/{media → state}/processing_history/__init__.py +1 -3
  223. endoreg_db/models/state/processing_history/processing_history.py +136 -0
  224. endoreg_db/models/state/raw_pdf.py +0 -1
  225. endoreg_db/models/state/video.py +2 -3
  226. endoreg_db/models/utils.py +4 -2
  227. endoreg_db/queries/__init__.py +2 -6
  228. endoreg_db/queries/annotations/__init__.py +1 -3
  229. endoreg_db/queries/annotations/legacy.py +37 -26
  230. endoreg_db/root_urls.py +3 -4
  231. endoreg_db/schemas/examination_evaluation.py +3 -0
  232. endoreg_db/serializers/Frames_NICE_and_PARIS_classifications.py +249 -163
  233. endoreg_db/serializers/__init__.py +2 -8
  234. endoreg_db/serializers/administration/__init__.py +1 -2
  235. endoreg_db/serializers/administration/ai/__init__.py +0 -1
  236. endoreg_db/serializers/administration/ai/active_model.py +3 -1
  237. endoreg_db/serializers/administration/ai/ai_model.py +5 -3
  238. endoreg_db/serializers/administration/ai/model_type.py +3 -1
  239. endoreg_db/serializers/administration/center.py +7 -2
  240. endoreg_db/serializers/administration/gender.py +4 -2
  241. endoreg_db/serializers/anonymization.py +13 -13
  242. endoreg_db/serializers/evaluation/examination_evaluation.py +0 -1
  243. endoreg_db/serializers/examination/__init__.py +1 -1
  244. endoreg_db/serializers/examination/base.py +12 -13
  245. endoreg_db/serializers/examination/dropdown.py +6 -7
  246. endoreg_db/serializers/examination_serializer.py +3 -6
  247. endoreg_db/serializers/finding/__init__.py +1 -1
  248. endoreg_db/serializers/finding/finding.py +14 -7
  249. endoreg_db/serializers/finding_classification/__init__.py +3 -3
  250. endoreg_db/serializers/finding_classification/choice.py +3 -3
  251. endoreg_db/serializers/finding_classification/classification.py +2 -4
  252. endoreg_db/serializers/label_video_segment/__init__.py +5 -3
  253. endoreg_db/serializers/{label → label_video_segment}/image_classification_annotation.py +5 -5
  254. endoreg_db/serializers/label_video_segment/label/__init__.py +6 -0
  255. endoreg_db/serializers/{label → label_video_segment/label}/label.py +1 -1
  256. endoreg_db/serializers/label_video_segment/label_video_segment.py +338 -228
  257. endoreg_db/serializers/meta/__init__.py +1 -2
  258. endoreg_db/serializers/meta/sensitive_meta_detail.py +28 -13
  259. endoreg_db/serializers/meta/sensitive_meta_update.py +51 -46
  260. endoreg_db/serializers/meta/sensitive_meta_verification.py +19 -16
  261. endoreg_db/serializers/misc/__init__.py +2 -2
  262. endoreg_db/serializers/misc/file_overview.py +11 -7
  263. endoreg_db/serializers/misc/stats.py +10 -8
  264. endoreg_db/serializers/misc/translatable_field_mix_in.py +6 -6
  265. endoreg_db/serializers/misc/upload_job.py +32 -29
  266. endoreg_db/serializers/patient/__init__.py +2 -1
  267. endoreg_db/serializers/patient/patient.py +32 -15
  268. endoreg_db/serializers/patient/patient_dropdown.py +11 -3
  269. endoreg_db/serializers/patient_examination/__init__.py +1 -1
  270. endoreg_db/serializers/patient_examination/patient_examination.py +67 -40
  271. endoreg_db/serializers/patient_finding/__init__.py +1 -1
  272. endoreg_db/serializers/patient_finding/patient_finding.py +2 -1
  273. endoreg_db/serializers/patient_finding/patient_finding_classification.py +17 -9
  274. endoreg_db/serializers/patient_finding/patient_finding_detail.py +26 -17
  275. endoreg_db/serializers/patient_finding/patient_finding_intervention.py +7 -5
  276. endoreg_db/serializers/patient_finding/patient_finding_list.py +10 -11
  277. endoreg_db/serializers/patient_finding/patient_finding_write.py +36 -27
  278. endoreg_db/serializers/pdf/__init__.py +1 -3
  279. endoreg_db/serializers/requirements/requirement_schema.py +1 -6
  280. endoreg_db/serializers/sensitive_meta_serializer.py +100 -81
  281. endoreg_db/serializers/video/__init__.py +2 -2
  282. endoreg_db/serializers/video/{segmentation.py → video_file.py} +66 -47
  283. endoreg_db/serializers/video/video_file_brief.py +6 -2
  284. endoreg_db/serializers/video/video_file_detail.py +36 -23
  285. endoreg_db/serializers/video/video_file_list.py +4 -2
  286. endoreg_db/serializers/video/video_processing_history.py +54 -50
  287. endoreg_db/services/__init__.py +1 -1
  288. endoreg_db/services/anonymization.py +2 -2
  289. endoreg_db/services/examination_evaluation.py +40 -17
  290. endoreg_db/services/model_meta_from_hf.py +76 -0
  291. endoreg_db/services/polling_coordinator.py +101 -70
  292. endoreg_db/services/pseudonym_service.py +27 -22
  293. endoreg_db/services/report_import.py +6 -3
  294. endoreg_db/services/segment_sync.py +75 -59
  295. endoreg_db/services/video_import.py +6 -7
  296. endoreg_db/urls/__init__.py +2 -2
  297. endoreg_db/urls/ai.py +7 -25
  298. endoreg_db/urls/anonymization.py +61 -15
  299. endoreg_db/urls/auth.py +4 -4
  300. endoreg_db/urls/classification.py +4 -9
  301. endoreg_db/urls/examination.py +27 -18
  302. endoreg_db/urls/media.py +27 -34
  303. endoreg_db/urls/patient.py +11 -7
  304. endoreg_db/urls/requirements.py +3 -1
  305. endoreg_db/urls/root_urls.py +2 -3
  306. endoreg_db/urls/stats.py +24 -16
  307. endoreg_db/urls/upload.py +3 -11
  308. endoreg_db/utils/__init__.py +14 -15
  309. endoreg_db/utils/ai/__init__.py +1 -1
  310. endoreg_db/utils/ai/data_loader_for_model_input.py +262 -0
  311. endoreg_db/utils/ai/data_loader_for_model_training.py +262 -0
  312. endoreg_db/utils/ai/get.py +2 -1
  313. endoreg_db/utils/ai/inference_dataset.py +14 -15
  314. endoreg_db/utils/ai/model_training/config.py +117 -0
  315. endoreg_db/utils/ai/model_training/dataset.py +74 -0
  316. endoreg_db/utils/ai/model_training/losses.py +68 -0
  317. endoreg_db/utils/ai/model_training/metrics.py +78 -0
  318. endoreg_db/utils/ai/model_training/model_backbones.py +155 -0
  319. endoreg_db/utils/ai/model_training/model_gastronet_resnet.py +118 -0
  320. endoreg_db/utils/ai/model_training/trainer_gastronet_multilabel.py +771 -0
  321. endoreg_db/utils/ai/multilabel_classification_net.py +21 -6
  322. endoreg_db/utils/ai/predict.py +4 -4
  323. endoreg_db/utils/ai/preprocess.py +19 -11
  324. endoreg_db/utils/calc_duration_seconds.py +4 -4
  325. endoreg_db/utils/case_generator/lab_sample_factory.py +3 -4
  326. endoreg_db/utils/check_video_files.py +74 -47
  327. endoreg_db/utils/cropping.py +10 -9
  328. endoreg_db/utils/dataloader.py +11 -3
  329. endoreg_db/utils/dates.py +3 -4
  330. endoreg_db/utils/defaults/set_default_center.py +7 -6
  331. endoreg_db/utils/env.py +6 -2
  332. endoreg_db/utils/extract_specific_frames.py +24 -9
  333. endoreg_db/utils/file_operations.py +30 -18
  334. endoreg_db/utils/fix_video_path_direct.py +57 -41
  335. endoreg_db/utils/frame_anonymization_utils.py +157 -157
  336. endoreg_db/utils/hashs.py +3 -18
  337. endoreg_db/utils/links/requirement_link.py +96 -52
  338. endoreg_db/utils/ocr.py +30 -25
  339. endoreg_db/utils/operation_log.py +61 -0
  340. endoreg_db/utils/parse_and_generate_yaml.py +12 -13
  341. endoreg_db/utils/paths.py +6 -6
  342. endoreg_db/utils/permissions.py +40 -24
  343. endoreg_db/utils/pipelines/process_video_dir.py +50 -26
  344. endoreg_db/utils/product/sum_emissions.py +5 -3
  345. endoreg_db/utils/product/sum_weights.py +4 -2
  346. endoreg_db/utils/pydantic_models/__init__.py +3 -4
  347. endoreg_db/utils/requirement_operator_logic/_old/lab_value_operators.py +207 -107
  348. endoreg_db/utils/requirement_operator_logic/_old/model_evaluators.py +252 -65
  349. endoreg_db/utils/requirement_operator_logic/new_operator_logic.py +27 -10
  350. endoreg_db/utils/setup_config.py +21 -5
  351. endoreg_db/utils/storage.py +3 -1
  352. endoreg_db/utils/translation.py +19 -15
  353. endoreg_db/utils/uuid.py +1 -0
  354. endoreg_db/utils/validate_endo_roi.py +12 -4
  355. endoreg_db/utils/validate_subcategory_dict.py +26 -24
  356. endoreg_db/utils/validate_video_detailed.py +207 -149
  357. endoreg_db/utils/video/__init__.py +7 -3
  358. endoreg_db/utils/video/extract_frames.py +30 -18
  359. endoreg_db/utils/video/ffmpeg_wrapper.py +217 -52
  360. endoreg_db/utils/video/names.py +11 -6
  361. endoreg_db/utils/video/streaming_processor.py +175 -101
  362. endoreg_db/utils/video/video_splitter.py +30 -19
  363. endoreg_db/views/Frames_NICE_and_PARIS_classifications_views.py +59 -50
  364. endoreg_db/views/__init__.py +0 -20
  365. endoreg_db/views/anonymization/__init__.py +6 -2
  366. endoreg_db/views/anonymization/media_management.py +2 -6
  367. endoreg_db/views/anonymization/overview.py +34 -1
  368. endoreg_db/views/anonymization/validate.py +79 -18
  369. endoreg_db/views/auth/__init__.py +1 -1
  370. endoreg_db/views/auth/keycloak.py +16 -14
  371. endoreg_db/views/examination/__init__.py +12 -15
  372. endoreg_db/views/examination/examination.py +5 -5
  373. endoreg_db/views/examination/examination_manifest_cache.py +5 -5
  374. endoreg_db/views/examination/get_finding_classification_choices.py +8 -5
  375. endoreg_db/views/examination/get_finding_classifications.py +9 -7
  376. endoreg_db/views/examination/get_findings.py +8 -10
  377. endoreg_db/views/examination/get_instruments.py +3 -2
  378. endoreg_db/views/examination/get_interventions.py +1 -1
  379. endoreg_db/views/finding/__init__.py +2 -2
  380. endoreg_db/views/finding/finding.py +58 -54
  381. endoreg_db/views/finding/get_classifications.py +1 -1
  382. endoreg_db/views/finding/get_interventions.py +1 -1
  383. endoreg_db/views/finding_classification/__init__.py +5 -5
  384. endoreg_db/views/finding_classification/finding_classification.py +5 -6
  385. endoreg_db/views/finding_classification/get_classification_choices.py +3 -4
  386. endoreg_db/views/media/__init__.py +13 -13
  387. endoreg_db/views/media/pdf_media.py +9 -9
  388. endoreg_db/views/media/sensitive_metadata.py +10 -7
  389. endoreg_db/views/media/video_media.py +4 -4
  390. endoreg_db/views/meta/__init__.py +1 -1
  391. endoreg_db/views/meta/sensitive_meta_list.py +20 -22
  392. endoreg_db/views/meta/sensitive_meta_verification.py +14 -11
  393. endoreg_db/views/misc/__init__.py +6 -34
  394. endoreg_db/views/misc/center.py +2 -1
  395. endoreg_db/views/misc/csrf.py +2 -1
  396. endoreg_db/views/misc/gender.py +2 -1
  397. endoreg_db/views/misc/stats.py +141 -106
  398. endoreg_db/views/patient/__init__.py +1 -3
  399. endoreg_db/views/patient/patient.py +141 -99
  400. endoreg_db/views/patient_examination/__init__.py +5 -5
  401. endoreg_db/views/patient_examination/patient_examination.py +43 -42
  402. endoreg_db/views/patient_examination/patient_examination_create.py +10 -15
  403. endoreg_db/views/patient_examination/patient_examination_detail.py +12 -15
  404. endoreg_db/views/patient_examination/patient_examination_list.py +21 -17
  405. endoreg_db/views/patient_examination/video.py +114 -80
  406. endoreg_db/views/patient_finding/__init__.py +1 -1
  407. endoreg_db/views/patient_finding/patient_finding.py +17 -10
  408. endoreg_db/views/patient_finding/patient_finding_optimized.py +127 -95
  409. endoreg_db/views/patient_finding_classification/__init__.py +1 -1
  410. endoreg_db/views/patient_finding_classification/pfc_create.py +35 -27
  411. endoreg_db/views/report/reimport.py +1 -1
  412. endoreg_db/views/report/report_stream.py +5 -8
  413. endoreg_db/views/requirement/__init__.py +2 -1
  414. endoreg_db/views/requirement/evaluate.py +7 -9
  415. endoreg_db/views/requirement/lookup.py +2 -3
  416. endoreg_db/views/requirement/lookup_store.py +0 -1
  417. endoreg_db/views/requirement/requirement_utils.py +2 -4
  418. endoreg_db/views/stats/__init__.py +4 -4
  419. endoreg_db/views/stats/stats_views.py +152 -115
  420. endoreg_db/views/video/__init__.py +18 -27
  421. endoreg_db/views/{ai → video/ai}/__init__.py +2 -2
  422. endoreg_db/views/{ai → video/ai}/label.py +20 -16
  423. endoreg_db/views/video/correction.py +5 -6
  424. endoreg_db/views/video/reimport.py +134 -99
  425. endoreg_db/views/video/segments_crud.py +134 -44
  426. endoreg_db/views/video/video_apply_mask.py +13 -12
  427. endoreg_db/views/video/video_correction.py +2 -1
  428. endoreg_db/views/video/video_download_processed.py +15 -15
  429. endoreg_db/views/video/video_meta_stats.py +7 -6
  430. endoreg_db/views/video/video_processing_history.py +3 -2
  431. endoreg_db/views/video/video_remove_frames.py +13 -12
  432. endoreg_db/views/video/video_stream.py +110 -82
  433. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/METADATA +9 -3
  434. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/RECORD +436 -433
  435. endoreg_db/import_files/processing/video_processing/video_cleanup_on_error.py +0 -119
  436. endoreg_db/management/commands/import_fallback_video.py +0 -203
  437. endoreg_db/management/commands/import_video.py +0 -422
  438. endoreg_db/management/commands/import_video_with_classification.py +0 -367
  439. endoreg_db/models/media/processing_history/processing_history.py +0 -96
  440. endoreg_db/serializers/label/__init__.py +0 -7
  441. endoreg_db/serializers/label_video_segment/_lvs_create.py +0 -149
  442. endoreg_db/serializers/label_video_segment/_lvs_update.py +0 -138
  443. endoreg_db/serializers/label_video_segment/_lvs_validate.py +0 -149
  444. endoreg_db/serializers/label_video_segment/label_video_segment_annotation.py +0 -99
  445. endoreg_db/serializers/label_video_segment/label_video_segment_update.py +0 -163
  446. endoreg_db/services/__old/pdf_import.py +0 -1487
  447. endoreg_db/services/__old/video_import.py +0 -1306
  448. endoreg_db/tasks/upload_tasks.py +0 -216
  449. endoreg_db/tasks/video_ingest.py +0 -161
  450. endoreg_db/tasks/video_processing_tasks.py +0 -327
  451. endoreg_db/views/misc/translation.py +0 -182
  452. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/WHEEL +0 -0
  453. {endoreg_db-0.8.8.9.dist-info → endoreg_db-0.8.9.10.dist-info}/licenses/LICENSE +0 -0
endoreg_db/urls/media.py CHANGED
@@ -1,36 +1,35 @@
1
1
  from django.urls import path
2
2
 
3
3
  from endoreg_db.views import VideoStreamView
4
- from endoreg_db.views.ai import label_list
5
4
  from endoreg_db.views.media import (
6
- PdfMediaView, # Alias to avoid conflict with legacy pdf.PDFMediaView
7
- VideoMediaView,
8
- get_sensitive_metadata_pk,
9
- pdf_sensitive_metadata,
10
- pdf_sensitive_metadata_list,
11
- pdf_sensitive_metadata_verify,
12
- sensitive_metadata_list,
13
- video_sensitive_metadata,
14
- video_sensitive_metadata_verify,
5
+ PdfMediaView, # Alias to avoid conflict with legacy pdf.reportMediaView
6
+ VideoMediaView,
7
+ get_sensitive_metadata_pk,
8
+ pdf_sensitive_metadata,
9
+ pdf_sensitive_metadata_list,
10
+ pdf_sensitive_metadata_verify,
11
+ sensitive_metadata_list,
12
+ video_sensitive_metadata,
13
+ video_sensitive_metadata_verify,
15
14
  )
16
15
  from endoreg_db.views.report.reimport import ReportReimportView
17
16
  from endoreg_db.views.report.report_stream import ReportStreamView
18
17
  from endoreg_db.views.video import (
19
- VideoReimportView,
20
- video_segment_detail,
21
- video_segment_validate,
22
- video_segments_by_video,
23
- video_segments_collection,
24
- video_segments_stats,
25
- video_segments_validate_bulk,
26
- video_segments_validation_status,
18
+ VideoReimportView,
19
+ video_segment_detail,
20
+ video_segment_validate,
21
+ video_segments_by_video,
22
+ video_segments_collection,
23
+ video_segments_stats,
24
+ video_segments_validate_bulk,
25
+ video_segments_validation_status,
27
26
  )
27
+ from endoreg_db.views.video.ai import label_list
28
28
  from endoreg_db.views.video.correction import (
29
- VideoApplyMaskView,
30
- VideoCorrectionView,
31
- VideoMetadataStatsView,
32
- VideoProcessingHistoryView,
33
- VideoRemoveFramesView,
29
+ VideoApplyMaskView,
30
+ VideoCorrectionView,
31
+ VideoMetadataStatsView,
32
+ VideoRemoveFramesView,
34
33
  )
35
34
 
36
35
  # ---------------------------------------------------------------------------------------
@@ -95,14 +94,6 @@ urlpatterns = [
95
94
  VideoMetadataStatsView.as_view(),
96
95
  name="video-metadata",
97
96
  ),
98
- # Video Processing History API
99
- # GET /api/media/videos/<int:pk>/processing-history/
100
- # Returns history of all processing operations (masking, frame removal, analysis)
101
- path(
102
- "media/videos/<int:pk>/processing-history/",
103
- VideoProcessingHistoryView.as_view(),
104
- name="video-processing-history",
105
- ),
106
97
  # Video Analysis API
107
98
  # POST /api/media/videos/<int:pk>/analyze/
108
99
  # Analyzes video for sensitive frames using MiniCPM-o 2.6 or OCR+LLM
@@ -234,7 +225,7 @@ urlpatterns = [
234
225
  ),
235
226
  # List Endpoints (Collection-Level)
236
227
  # GET /api/media/sensitive-metadata/
237
- # List all sensitive metadata (combined PDFs and Videos)
228
+ # List all sensitive metadata (combined reports and Videos)
238
229
  # Supports filtering: ?content_type=pdf|video&verified=true&search=name
239
230
  path(
240
231
  "media/sensitive-metadata/",
@@ -242,7 +233,7 @@ urlpatterns = [
242
233
  name="sensitive-metadata-list",
243
234
  ),
244
235
  # GET /api/media/pdfs/sensitive-metadata/
245
- # List sensitive metadata for PDFs only
236
+ # List sensitive metadata for reports only
246
237
  # Replaces legacy /api/pdf/sensitivemeta/list/
247
238
  path(
248
239
  "media/pdfs/sensitive-metadata/",
@@ -259,7 +250,9 @@ urlpatterns = [
259
250
  # POST /api/media/pdfs/<int:pk>/reimport/
260
251
  # Re-imports a report file to regenerate metadata when OCR failed or data is incomplete
261
252
  path(
262
- "media/pdfs/<int:pk>/reimport/", ReportReimportView.as_view(), name="report-reimport"
253
+ "media/pdfs/<int:pk>/reimport/",
254
+ ReportReimportView.as_view(),
255
+ name="report-reimport",
263
256
  ),
264
257
  ]
265
258
  # ---------------------------------------------------------------------------------------
@@ -2,18 +2,22 @@ from endoreg_db.views import (
2
2
  GenderViewSet,
3
3
  CenterViewSet,
4
4
  PatientViewSet,
5
- PatientFindingViewSet
5
+ PatientFindingViewSet,
6
6
  )
7
7
  from rest_framework.routers import DefaultRouter
8
8
  from django.urls import path, include
9
9
 
10
10
  router = DefaultRouter()
11
- router.register(r'patients', PatientViewSet)
12
- router.register(r'centers', CenterViewSet)
13
- router.register(r'genders', GenderViewSet)
14
- router.register(r'patient-findings', PatientFindingViewSet)
11
+ router.register(r"patients", PatientViewSet)
12
+ router.register(r"centers", CenterViewSet)
13
+ router.register(r"genders", GenderViewSet)
14
+ router.register(r"patient-findings", PatientFindingViewSet)
15
15
 
16
16
  urlpatterns = [
17
- path('', include(router.urls)),
18
- path('check_pe_exist/<int:pk>/', PatientViewSet.as_view({'get': 'check_pe_exist'}), name='check_pe_exist'),
17
+ path("", include(router.urls)),
18
+ path(
19
+ "check_pe_exist/<int:pk>/",
20
+ PatientViewSet.as_view({"get": "check_pe_exist"}),
21
+ name="check_pe_exist",
22
+ ),
19
23
  ]
@@ -9,5 +9,7 @@ router.register(r"lookup", LookupViewSet, basename="lookup/")
9
9
 
10
10
  urlpatterns = [
11
11
  path("", include(router.urls)),
12
- path("evaluate-requirements/", evaluate_requirements, name="evaluate-requirements/"),
12
+ path(
13
+ "evaluate-requirements/", evaluate_requirements, name="evaluate-requirements/"
14
+ ),
13
15
  ]
@@ -5,20 +5,19 @@ from django.http import HttpResponse
5
5
  from django.conf import settings
6
6
  from django.conf.urls.static import static
7
7
 
8
+
8
9
  def public_home(_request):
9
10
  return HttpResponse("Public home – no login required.")
10
11
 
12
+
11
13
  urlpatterns = [
12
14
  # Public landing page
13
15
  path("", public_home, name="public_home"),
14
-
15
16
  # Django admin (optional)
16
17
  path("admin/", admin.site.urls),
17
-
18
18
  # Mount ALL API routes under /api/
19
19
  # This pulls the urlpatterns exported by endoreg_db/urls/__init__.py
20
20
  path("api/", include("endoreg_db.urls")),
21
-
22
21
  # Keycloak OIDC (mozilla-django-oidc provides /oidc/authenticate/ and /oidc/callback/)
23
22
  path("oidc/", include("mozilla_django_oidc.urls")),
24
23
  ]
endoreg_db/urls/stats.py CHANGED
@@ -13,34 +13,42 @@ url_patterns = [
13
13
  #
14
14
  # Diese Endpunkte stellen Dashboard-Statistiken bereit für das Frontend
15
15
  # ---------------------------------------------------------------------------------------
16
-
17
16
  # Examination Statistics API
18
17
  # GET /api/examinations/stats/
19
18
  # Liefert Statistiken über Examinations und PatientExaminations
20
- path('examinations/stats/', ExaminationStatsView.as_view(), name='examination_stats'),
21
-
22
- # Video Segment Statistics API
19
+ path(
20
+ "examinations/stats/", ExaminationStatsView.as_view(), name="examination_stats"
21
+ ),
22
+ # Video Segment Statistics API
23
23
  # GET /api/video-segment/stats/ (Note: singular 'segment' to match frontend)
24
24
  # Liefert Statistiken über Video-Segmente und Label-Verteilung
25
- path('video-segment/stats/', VideoSegmentStatsView.as_view(), name='video_segment_stats'),
26
-
25
+ path(
26
+ "video-segment/stats/",
27
+ VideoSegmentStatsView.as_view(),
28
+ name="video_segment_stats",
29
+ ),
27
30
  # Alternative Video Segments Statistics API (plural version for compatibility)
28
31
  # GET /api/video-segments/stats/
29
- path('video-segments/stats/', VideoSegmentStatsView.as_view(), name='video_segments_stats'),
30
-
32
+ path(
33
+ "video-segments/stats/",
34
+ VideoSegmentStatsView.as_view(),
35
+ name="video_segments_stats",
36
+ ),
31
37
  # Sensitive Meta Statistics API
32
38
  # GET /api/video/sensitivemeta/stats/
33
39
  # Liefert Statistiken über SensitiveMeta-Einträge und Verifikationsstatus
34
- path('video/sensitivemeta/stats/', SensitiveMetaStatsView.as_view(), name='sensitive_meta_stats'),
35
-
40
+ path(
41
+ "video/sensitivemeta/stats/",
42
+ SensitiveMetaStatsView.as_view(),
43
+ name="sensitive_meta_stats",
44
+ ),
36
45
  # General Dashboard Statistics API
37
46
  # GET /api/stats/
38
47
  # Liefert allgemeine Übersichtsstatistiken für das Dashboard
39
- path('stats/', GeneralStatsView.as_view(), name='general_stats'),
40
-
48
+ path("stats/", GeneralStatsView.as_view(), name="general_stats"),
41
49
  path(
42
- 'video-segments/stats/',
43
- VideoSegmentStatsView.as_view(),
44
- name='video_segments_stats'
50
+ "video-segments/stats/",
51
+ VideoSegmentStatsView.as_view(),
52
+ name="video_segments_stats",
45
53
  ),
46
- ]
54
+ ]
endoreg_db/urls/upload.py CHANGED
@@ -7,14 +7,6 @@ from endoreg_db.views import (
7
7
 
8
8
  urlpatterns = [
9
9
  # Upload endpoints
10
- path(
11
- 'upload/',
12
- UploadFileView.as_view(),
13
- name='video_upload'
14
- ),
15
- path(
16
- 'upload/<uuid:id>/status',
17
- UploadStatusView.as_view(),
18
- name='upload_status'
19
- ),
20
- ]
10
+ path("upload/", UploadFileView.as_view(), name="video_upload"),
11
+ path("upload/<uuid:id>/status", UploadStatusView.as_view(), name="upload_status"),
12
+ ]
@@ -3,7 +3,12 @@
3
3
  # --- Imports from submodules ---
4
4
 
5
5
  # dataloader
6
- from endoreg_db.utils.video.ffmpeg_wrapper import assemble_video_from_frames, get_stream_info, transcode_video, transcode_videofile_if_required
6
+ from endoreg_db.utils.video.ffmpeg_wrapper import (
7
+ assemble_video_from_frames,
8
+ get_stream_info,
9
+ transcode_video,
10
+ transcode_videofile_if_required,
11
+ )
7
12
 
8
13
  from .dataloader import load_model_data_from_yaml
9
14
 
@@ -14,7 +19,10 @@ from .dates import ensure_aware_datetime, random_day_by_month_year, random_day_b
14
19
  from .env import DEBUG, DJANGO_SETTINGS_MODULE, get_env_var
15
20
 
16
21
  # file_operations
17
- from .file_operations import copy_with_progress, get_uuid_filename, rename_file_uuid
22
+ from .file_operations import (
23
+ copy_with_progress,
24
+ get_content_hash_filename,
25
+ )
18
26
 
19
27
  # hashs
20
28
  from .hashs import (
@@ -41,23 +49,15 @@ from .paths import data_paths
41
49
 
42
50
  # pydantic_models
43
51
  from .pydantic_models import DbConfig
44
- from .storage import (
45
- delete_field_file,
46
- ensure_local_file,
47
- save_local_file,
48
- )
49
- from .storage import (
50
- file_exists as storage_file_exists,
51
- )
52
+ from .storage import delete_field_file, ensure_local_file, save_local_file
53
+ from .storage import file_exists as storage_file_exists
52
54
 
53
55
  # validate_endo_roi
54
56
  from .validate_endo_roi import validate_endo_roi
55
57
  from .video import split_video
56
58
 
57
59
  # ffmpeg_wrapper
58
- from .video.ffmpeg_wrapper import (
59
- extract_frames,
60
- )
60
+ from .video.ffmpeg_wrapper import extract_frames
61
61
 
62
62
  # --- Exports ---
63
63
 
@@ -77,13 +77,12 @@ __all__ = [
77
77
  "get_hash_string",
78
78
  "get_patient_examination_hash",
79
79
  "get_pdf_hash",
80
- "get_uuid_filename",
80
+ "get_content_hash_filename",
81
81
  "get_video_hash",
82
82
  "guess_name_gender",
83
83
  "load_model_data_from_yaml",
84
84
  "random_day_by_month_year",
85
85
  "random_day_by_year",
86
- "rename_file_uuid",
87
86
  "validate_endo_roi",
88
87
  "assemble_video_from_frames", # Updated name
89
88
  "get_stream_info",
@@ -6,4 +6,4 @@ __all__ = [
6
6
  "InferenceDataset",
7
7
  "MultiLabelClassificationNet",
8
8
  "Classifier",
9
- ]
9
+ ]
@@ -0,0 +1,262 @@
1
+ # endoreg_db/utils/ai/data_loader_for_model_training.py
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import defaultdict
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, TypedDict
8
+
9
+ from django.db import models
10
+
11
+ from endoreg_db.models import (
12
+ AIDataSet,
13
+ Frame,
14
+ ImageClassificationAnnotation,
15
+ Label,
16
+ LabelSet,
17
+ )
18
+
19
+
20
+ class ImageMultilabelDataset(TypedDict):
21
+ """
22
+ In-memory representation of an image multi-label training dataset.
23
+
24
+ All lists are aligned by index:
25
+
26
+ image_paths[i] -> path to image file for sample i
27
+ label_vectors[i] -> list[int|None] of length == len(labels)
28
+ label_masks[i] -> list[int] of length == len(labels)
29
+
30
+ Where:
31
+ - label_vectors[i][j] is:
32
+ 1 -> positive annotation (value=True)
33
+ 0 -> negative annotation (value=False)
34
+ None -> UNKNOWN (no annotation for that (frame, label))
35
+
36
+ - label_masks[i][j] is:
37
+ 1 -> this entry participates in the loss (0 or 1 is known)
38
+ 0 -> IGNORE in the loss (value was None)
39
+ """
40
+
41
+ # type description of the returned dict.
42
+
43
+ image_paths: List[str]
44
+ label_vectors: List[List[Optional[int]]]
45
+ label_masks: List[List[int]]
46
+ labels: List[Label]
47
+ labelset: LabelSet
48
+
49
+ # New: keep track of which DB rows were used, and their legacy exam ids
50
+ frame_ids: List[int] # Frame.pk for each sample
51
+ old_examination_ids: List[Optional[int]] # may be None if not set
52
+
53
+
54
+ def _infer_labelset_from_annotations(
55
+ annotations_qs: models.QuerySet[ImageClassificationAnnotation],
56
+ ) -> LabelSet:
57
+ """
58
+ Try to infer a unique LabelSet from the labels used in the annotations.
59
+
60
+ Strategy:
61
+ 1. Collect all distinct label_ids from the annotations.
62
+ 2. Fetch all Label objects + their label_sets.
63
+ 3. Compute the intersection of all label_sets across all labels.
64
+ 4. If there is exactly ONE common LabelSet, return it.
65
+ Otherwise, raise NotImplementedError for now.
66
+ """
67
+ label_ids = list(annotations_qs.values_list("label_id", flat=True).distinct())
68
+ if not label_ids:
69
+ raise ValueError("Cannot infer LabelSet: annotations queryset has no labels.")
70
+
71
+ labels_qs = Label.objects.filter(id__in=label_ids).prefetch_related("label_sets")
72
+ labelsets_for_each_label = []
73
+
74
+ for lbl in labels_qs:
75
+ # lbl.label_sets is the reverse of LabelSet.labels M2M
76
+ ls_ids = list(lbl.label_sets.values_list("id", flat=True))
77
+ if not ls_ids:
78
+ # This label is not part of any LabelSet -> ambiguous
79
+ raise NotImplementedError(
80
+ f"Label id={lbl.id}, name='{lbl.name}' is not part of any LabelSet. "
81
+ "Explicit LabelSet selection is required."
82
+ )
83
+ labelsets_for_each_label.append(set(ls_ids))
84
+
85
+ # Intersection of all labelset id sets
86
+ common_ids = set.intersection(*labelsets_for_each_label)
87
+ if not common_ids:
88
+ raise NotImplementedError(
89
+ "No common LabelSet across all labels in this AIDataSet. "
90
+ "Please specify a LabelSet explicitly."
91
+ )
92
+ if len(common_ids) > 1:
93
+ raise NotImplementedError(
94
+ "More than one common LabelSet found for the labels in this AIDataSet. "
95
+ "Please specify a LabelSet explicitly to disambiguate."
96
+ )
97
+
98
+ ls_id = next(iter(common_ids))
99
+ return LabelSet.objects.get(id=ls_id)
100
+
101
+
102
+ def build_image_multilabel_dataset_from_db(
103
+ dataset: AIDataSet,
104
+ labelset: Optional[LabelSet] = None,
105
+ ) -> ImageMultilabelDataset:
106
+ """
107
+ Build an in-memory multilabel dataset for an IMAGE-based AIDataSet.
108
+
109
+ Steps:
110
+ 1. Take all ImageClassificationAnnotation rows linked to this AIDataSet
111
+ (via dataset.image_annotations M2M).
112
+ 2. Determine the LabelSet (either explicitly given or inferred).
113
+ 3. For each used Frame, build:
114
+ - an image path
115
+ - a label vector (1, 0, or None for each label in LabelSet)
116
+ - a mask vector (1 where known, 0 where unknown)
117
+ 4. Return a dict that can be wrapped in a torch/tf Dataset.
118
+
119
+ NOTE:
120
+ - This function does NOT write anything to the DB.
121
+ - It only reads DB rows and returns Python structures.
122
+ """
123
+ if dataset.dataset_type != AIDataSet.DATASET_TYPE_IMAGE:
124
+ raise ValueError(
125
+ f"build_image_multilabel_dataset_from_db expected dataset_type='image', "
126
+ f"got '{dataset.dataset_type}' for AIDataSet id={dataset.id}."
127
+ )
128
+
129
+ # Get the annotation relation dynamically (for future video/text types)
130
+ annotations_qs = dataset.get_annotations_queryset().select_related("frame", "label")
131
+
132
+ if annotations_qs.count() == 0:
133
+ raise ValueError(
134
+ f"AIDataSet id={dataset.id} has no annotations attached. "
135
+ "Make sure your import script populated image_annotations."
136
+ )
137
+
138
+ # Decide which LabelSet to use
139
+ if labelset is None:
140
+ labelset = _infer_labelset_from_annotations(annotations_qs)
141
+
142
+ # Fixed label order (= fixed column order for the label vectors)
143
+ labels_in_order: List[Label] = labelset.get_labels_in_order()
144
+ if not labels_in_order:
145
+ raise ValueError(
146
+ f"LabelSet id={labelset.id}, name='{labelset.name}' has no labels."
147
+ )
148
+
149
+ num_labels = len(labels_in_order)
150
+ label_index: Dict[int, int] = {
151
+ lbl.id: idx for idx, lbl in enumerate(labels_in_order)
152
+ }
153
+
154
+ # Group annotations by frame
155
+ anns_by_frame: Dict[int, List[ImageClassificationAnnotation]] = defaultdict(list)
156
+ frames_order: List[int] = []
157
+
158
+ for ann in annotations_qs:
159
+ frame_id = ann.frame_id
160
+ if frame_id not in anns_by_frame:
161
+ frames_order.append(frame_id)
162
+ anns_by_frame[frame_id].append(ann)
163
+
164
+ # Build vectors
165
+ image_paths: List[str] = []
166
+ label_vectors: List[List[Optional[int]]] = []
167
+ label_masks: List[List[int]] = []
168
+
169
+ # New: id tracking for splitting / logging
170
+ frame_ids: List[int] = []
171
+ old_examination_ids: List[Optional[int]] = []
172
+
173
+ # Cache frames to avoid repeated DB hits
174
+ frame_obj_by_id: Dict[int, Frame] = {}
175
+
176
+ for frame_id in frames_order:
177
+ frame_annotations = anns_by_frame[frame_id]
178
+
179
+ # Resolve frame object (from first annotation of this frame)
180
+ frame = frame_obj_by_id.get(frame_id)
181
+ if frame is None:
182
+ frame = frame_annotations[0].frame
183
+ frame_obj_by_id[frame_id] = frame
184
+
185
+ # New: remember DB ids for this sample
186
+ frame_ids.append(frame_id)
187
+ old_examination_ids.append(getattr(frame, "old_examination_id", None))
188
+
189
+ # Start with unknown for all labels
190
+ vec: List[Optional[int]] = [None] * num_labels
191
+
192
+ # Fill with 1/0 where we have annotations
193
+ for ann in frame_annotations:
194
+ idx = label_index.get(ann.label_id)
195
+ if idx is None:
196
+ # Label not part of this LabelSet: ignore
197
+ continue
198
+ vec[idx] = 1 if ann.value else 0
199
+
200
+ # Build mask: 1 where vec is known, 0 where unknown
201
+ mask: List[int] = [0 if v is None else 1 for v in vec]
202
+
203
+ # Resolve absolute image path from the Frame model
204
+ file_path: Path = frame.file_path
205
+ image_paths.append(str(file_path))
206
+ label_vectors.append(vec)
207
+ label_masks.append(mask)
208
+
209
+ return ImageMultilabelDataset(
210
+ image_paths=image_paths,
211
+ label_vectors=label_vectors,
212
+ label_masks=label_masks,
213
+ labels=labels_in_order,
214
+ labelset=labelset,
215
+ frame_ids=frame_ids,
216
+ old_examination_ids=old_examination_ids,
217
+ )
218
+
219
+
220
+ def build_dataset_for_training(
221
+ dataset: AIDataSet,
222
+ labelset: Optional[LabelSet] = None,
223
+ ):
224
+ """
225
+ High-level entry point to build a training dataset from an AIDataSet row.
226
+
227
+ It inspects:
228
+ - dataset.dataset_type
229
+ - dataset.ai_model_type
230
+
231
+ and dispatches to the appropriate builder.
232
+
233
+ For now, we support:
234
+ - dataset_type = "image"
235
+ - ai_model_type = "image_multilabel_classification"
236
+
237
+ Later, you can extend this to:
238
+ - video segmentation
239
+ - text classification
240
+ etc.
241
+ """
242
+ # IMAGE MULTILABEL CASE
243
+ if (
244
+ dataset.dataset_type == AIDataSet.DATASET_TYPE_IMAGE
245
+ and dataset.ai_model_type == AIDataSet.AI_MODEL_TYPE_IMAGE_MULTILABEL
246
+ ):
247
+ return build_image_multilabel_dataset_from_db(dataset, labelset=labelset)
248
+
249
+ # FUTURE EXTENSIONS (example structure, not yet implemented):
250
+ # if dataset.dataset_type == AIDataSet.DATASET_TYPE_VIDEO and \
251
+ # dataset.ai_model_type == AIDataSet.AI_MODEL_TYPE_VIDEO_SEGMENTATION:
252
+ # return build_video_segmentation_dataset_from_db(dataset, labelset=labelset)
253
+ #
254
+ # if dataset.dataset_type == AIDataSet.DATASET_TYPE_TEXT and \
255
+ # dataset.ai_model_type == AIDataSet.AI_MODEL_TYPE_TEXT_CLASSIFICATION:
256
+ # return build_text_classification_dataset_from_db(dataset, labelset=labelset)
257
+
258
+ raise NotImplementedError(
259
+ f"No dataset builder implemented for "
260
+ f"dataset_type='{dataset.dataset_type}', "
261
+ f"ai_model_type='{dataset.ai_model_type}'."
262
+ )