endoreg-db 0.8.9.2__py3-none-any.whl → 0.8.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (450) hide show
  1. endoreg_db/admin.py +10 -5
  2. endoreg_db/apps.py +4 -7
  3. endoreg_db/authz/auth.py +1 -0
  4. endoreg_db/authz/backends.py +1 -1
  5. endoreg_db/authz/management/commands/list_routes.py +2 -0
  6. endoreg_db/authz/middleware.py +8 -7
  7. endoreg_db/authz/permissions.py +21 -10
  8. endoreg_db/authz/policy.py +14 -19
  9. endoreg_db/authz/views_auth.py +14 -10
  10. endoreg_db/codemods/rename_datetime_fields.py +8 -1
  11. endoreg_db/exceptions.py +5 -2
  12. endoreg_db/forms/__init__.py +0 -1
  13. endoreg_db/forms/examination_form.py +4 -3
  14. endoreg_db/forms/patient_finding_intervention_form.py +30 -8
  15. endoreg_db/forms/patient_form.py +9 -13
  16. endoreg_db/forms/questionnaires/__init__.py +1 -1
  17. endoreg_db/forms/settings/__init__.py +4 -1
  18. endoreg_db/forms/unit.py +2 -1
  19. endoreg_db/helpers/count_db.py +17 -14
  20. endoreg_db/helpers/default_objects.py +2 -1
  21. endoreg_db/helpers/download_segmentation_model.py +4 -3
  22. endoreg_db/helpers/interact.py +0 -5
  23. endoreg_db/helpers/test_video_helper.py +33 -25
  24. endoreg_db/import_files/__init__.py +1 -1
  25. endoreg_db/import_files/context/__init__.py +1 -1
  26. endoreg_db/import_files/context/default_sensitive_meta.py +11 -9
  27. endoreg_db/import_files/context/ensure_center.py +4 -4
  28. endoreg_db/import_files/context/file_lock.py +3 -3
  29. endoreg_db/import_files/context/import_context.py +11 -12
  30. endoreg_db/import_files/context/validate_directories.py +1 -0
  31. endoreg_db/import_files/file_storage/create_report_file.py +57 -34
  32. endoreg_db/import_files/file_storage/create_video_file.py +64 -35
  33. endoreg_db/import_files/file_storage/sensitive_meta_storage.py +5 -2
  34. endoreg_db/import_files/file_storage/state_management.py +89 -122
  35. endoreg_db/import_files/file_storage/storage.py +5 -1
  36. endoreg_db/import_files/processing/report_processing/report_anonymization.py +24 -19
  37. endoreg_db/import_files/processing/sensitive_meta_adapter.py +3 -3
  38. endoreg_db/import_files/processing/video_processing/video_anonymization.py +18 -18
  39. endoreg_db/import_files/pseudonymization/k_anonymity.py +8 -9
  40. endoreg_db/import_files/pseudonymization/k_pseudonymity.py +16 -5
  41. endoreg_db/import_files/report_import_service.py +36 -30
  42. endoreg_db/import_files/video_import_service.py +27 -23
  43. endoreg_db/logger_conf.py +56 -40
  44. endoreg_db/management/__init__.py +1 -1
  45. endoreg_db/management/commands/__init__.py +1 -1
  46. endoreg_db/management/commands/check_auth.py +45 -38
  47. endoreg_db/management/commands/create_model_meta_from_huggingface.py +53 -2
  48. endoreg_db/management/commands/create_multilabel_model_meta.py +54 -19
  49. endoreg_db/management/commands/fix_missing_patient_data.py +105 -71
  50. endoreg_db/management/commands/fix_video_paths.py +75 -54
  51. endoreg_db/management/commands/import_report.py +1 -3
  52. endoreg_db/management/commands/list_routes.py +2 -0
  53. endoreg_db/management/commands/load_ai_model_data.py +8 -2
  54. endoreg_db/management/commands/load_ai_model_label_data.py +0 -1
  55. endoreg_db/management/commands/load_center_data.py +3 -3
  56. endoreg_db/management/commands/load_distribution_data.py +35 -38
  57. endoreg_db/management/commands/load_endoscope_data.py +0 -3
  58. endoreg_db/management/commands/load_examination_data.py +20 -4
  59. endoreg_db/management/commands/load_finding_data.py +18 -3
  60. endoreg_db/management/commands/load_gender_data.py +17 -24
  61. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +95 -85
  62. endoreg_db/management/commands/load_information_source.py +0 -3
  63. endoreg_db/management/commands/load_lab_value_data.py +14 -3
  64. endoreg_db/management/commands/load_legacy_data.py +303 -0
  65. endoreg_db/management/commands/load_name_data.py +1 -2
  66. endoreg_db/management/commands/load_pdf_type_data.py +4 -8
  67. endoreg_db/management/commands/load_profession_data.py +0 -1
  68. endoreg_db/management/commands/load_report_reader_flag_data.py +0 -4
  69. endoreg_db/management/commands/load_requirement_data.py +6 -2
  70. endoreg_db/management/commands/load_unit_data.py +0 -4
  71. endoreg_db/management/commands/load_user_groups.py +5 -7
  72. endoreg_db/management/commands/model_input.py +169 -0
  73. endoreg_db/management/commands/register_ai_model.py +22 -16
  74. endoreg_db/management/commands/setup_endoreg_db.py +110 -32
  75. endoreg_db/management/commands/storage_management.py +14 -8
  76. endoreg_db/management/commands/summarize_db_content.py +154 -63
  77. endoreg_db/management/commands/train_image_multilabel_model.py +144 -0
  78. endoreg_db/management/commands/validate_video_files.py +82 -50
  79. endoreg_db/management/commands/video_validation.py +4 -6
  80. endoreg_db/migrations/0001_initial.py +112 -63
  81. endoreg_db/models/__init__.py +8 -0
  82. endoreg_db/models/administration/ai/active_model.py +5 -5
  83. endoreg_db/models/administration/ai/ai_model.py +41 -18
  84. endoreg_db/models/administration/ai/model_type.py +1 -0
  85. endoreg_db/models/administration/case/case.py +22 -22
  86. endoreg_db/models/administration/center/__init__.py +5 -5
  87. endoreg_db/models/administration/center/center.py +6 -2
  88. endoreg_db/models/administration/center/center_resource.py +18 -4
  89. endoreg_db/models/administration/center/center_shift.py +3 -1
  90. endoreg_db/models/administration/center/center_waste.py +6 -2
  91. endoreg_db/models/administration/person/__init__.py +1 -1
  92. endoreg_db/models/administration/person/employee/__init__.py +1 -1
  93. endoreg_db/models/administration/person/employee/employee_type.py +3 -1
  94. endoreg_db/models/administration/person/examiner/__init__.py +1 -1
  95. endoreg_db/models/administration/person/examiner/examiner.py +10 -2
  96. endoreg_db/models/administration/person/names/first_name.py +6 -4
  97. endoreg_db/models/administration/person/names/last_name.py +4 -3
  98. endoreg_db/models/administration/person/patient/__init__.py +1 -1
  99. endoreg_db/models/administration/person/patient/patient.py +0 -1
  100. endoreg_db/models/administration/person/patient/patient_external_id.py +0 -1
  101. endoreg_db/models/administration/person/person.py +1 -1
  102. endoreg_db/models/administration/product/__init__.py +7 -6
  103. endoreg_db/models/administration/product/product.py +6 -2
  104. endoreg_db/models/administration/product/product_group.py +9 -7
  105. endoreg_db/models/administration/product/product_material.py +9 -2
  106. endoreg_db/models/administration/product/reference_product.py +64 -15
  107. endoreg_db/models/administration/qualification/qualification.py +3 -1
  108. endoreg_db/models/administration/shift/shift.py +3 -1
  109. endoreg_db/models/administration/shift/shift_type.py +12 -4
  110. endoreg_db/models/aidataset/__init__.py +5 -0
  111. endoreg_db/models/aidataset/aidataset.py +193 -0
  112. endoreg_db/models/label/__init__.py +1 -1
  113. endoreg_db/models/label/label.py +10 -2
  114. endoreg_db/models/label/label_set.py +3 -1
  115. endoreg_db/models/label/label_video_segment/_create_from_video.py +6 -2
  116. endoreg_db/models/label/label_video_segment/label_video_segment.py +148 -44
  117. endoreg_db/models/media/__init__.py +12 -5
  118. endoreg_db/models/media/frame/__init__.py +1 -1
  119. endoreg_db/models/media/frame/frame.py +34 -8
  120. endoreg_db/models/media/pdf/__init__.py +2 -1
  121. endoreg_db/models/media/pdf/raw_pdf.py +11 -4
  122. endoreg_db/models/media/pdf/report_file.py +6 -2
  123. endoreg_db/models/media/pdf/report_reader/__init__.py +3 -3
  124. endoreg_db/models/media/pdf/report_reader/report_reader_flag.py +15 -5
  125. endoreg_db/models/media/video/create_from_file.py +20 -41
  126. endoreg_db/models/media/video/pipe_1.py +75 -30
  127. endoreg_db/models/media/video/pipe_2.py +37 -12
  128. endoreg_db/models/media/video/video_file.py +36 -24
  129. endoreg_db/models/media/video/video_file_ai.py +235 -70
  130. endoreg_db/models/media/video/video_file_anonymize.py +240 -65
  131. endoreg_db/models/media/video/video_file_frames/_bulk_create_frames.py +6 -1
  132. endoreg_db/models/media/video/video_file_frames/_create_frame_object.py +3 -1
  133. endoreg_db/models/media/video/video_file_frames/_delete_frames.py +30 -9
  134. endoreg_db/models/media/video/video_file_frames/_extract_frames.py +95 -29
  135. endoreg_db/models/media/video/video_file_frames/_get_frame.py +13 -3
  136. endoreg_db/models/media/video/video_file_frames/_get_frame_path.py +4 -1
  137. endoreg_db/models/media/video/video_file_frames/_get_frame_paths.py +15 -3
  138. endoreg_db/models/media/video/video_file_frames/_get_frame_range.py +15 -3
  139. endoreg_db/models/media/video/video_file_frames/_get_frames.py +7 -2
  140. endoreg_db/models/media/video/video_file_frames/_initialize_frames.py +109 -23
  141. endoreg_db/models/media/video/video_file_frames/_manage_frame_range.py +111 -27
  142. endoreg_db/models/media/video/video_file_frames/_mark_frames_extracted_status.py +46 -13
  143. endoreg_db/models/media/video/video_file_io.py +85 -33
  144. endoreg_db/models/media/video/video_file_meta/__init__.py +6 -6
  145. endoreg_db/models/media/video/video_file_meta/get_crop_template.py +17 -4
  146. endoreg_db/models/media/video/video_file_meta/get_endo_roi.py +28 -7
  147. endoreg_db/models/media/video/video_file_meta/get_fps.py +46 -13
  148. endoreg_db/models/media/video/video_file_meta/initialize_video_specs.py +81 -20
  149. endoreg_db/models/media/video/video_file_meta/text_meta.py +61 -20
  150. endoreg_db/models/media/video/video_file_meta/video_meta.py +40 -12
  151. endoreg_db/models/media/video/video_file_segments.py +118 -27
  152. endoreg_db/models/media/video/video_metadata.py +25 -6
  153. endoreg_db/models/media/video/video_processing.py +54 -15
  154. endoreg_db/models/medical/__init__.py +3 -13
  155. endoreg_db/models/medical/contraindication/__init__.py +3 -1
  156. endoreg_db/models/medical/disease.py +18 -6
  157. endoreg_db/models/medical/event.py +6 -2
  158. endoreg_db/models/medical/examination/__init__.py +5 -1
  159. endoreg_db/models/medical/examination/examination.py +22 -6
  160. endoreg_db/models/medical/examination/examination_indication.py +23 -7
  161. endoreg_db/models/medical/examination/examination_time.py +6 -2
  162. endoreg_db/models/medical/finding/__init__.py +3 -1
  163. endoreg_db/models/medical/finding/finding.py +37 -12
  164. endoreg_db/models/medical/finding/finding_classification.py +27 -8
  165. endoreg_db/models/medical/finding/finding_intervention.py +19 -6
  166. endoreg_db/models/medical/finding/finding_type.py +3 -1
  167. endoreg_db/models/medical/hardware/__init__.py +1 -1
  168. endoreg_db/models/medical/hardware/endoscope.py +14 -2
  169. endoreg_db/models/medical/laboratory/__init__.py +1 -1
  170. endoreg_db/models/medical/laboratory/lab_value.py +139 -39
  171. endoreg_db/models/medical/medication/__init__.py +7 -3
  172. endoreg_db/models/medical/medication/medication.py +3 -1
  173. endoreg_db/models/medical/medication/medication_indication.py +3 -1
  174. endoreg_db/models/medical/medication/medication_indication_type.py +11 -3
  175. endoreg_db/models/medical/medication/medication_intake_time.py +3 -1
  176. endoreg_db/models/medical/medication/medication_schedule.py +3 -1
  177. endoreg_db/models/medical/patient/__init__.py +2 -10
  178. endoreg_db/models/medical/patient/medication_examples.py +3 -14
  179. endoreg_db/models/medical/patient/patient_disease.py +17 -5
  180. endoreg_db/models/medical/patient/patient_event.py +12 -4
  181. endoreg_db/models/medical/patient/patient_examination.py +52 -15
  182. endoreg_db/models/medical/patient/patient_examination_indication.py +15 -4
  183. endoreg_db/models/medical/patient/patient_finding.py +105 -29
  184. endoreg_db/models/medical/patient/patient_finding_classification.py +41 -12
  185. endoreg_db/models/medical/patient/patient_finding_intervention.py +11 -3
  186. endoreg_db/models/medical/patient/patient_lab_sample.py +6 -2
  187. endoreg_db/models/medical/patient/patient_lab_value.py +42 -10
  188. endoreg_db/models/medical/patient/patient_medication.py +25 -7
  189. endoreg_db/models/medical/patient/patient_medication_schedule.py +34 -10
  190. endoreg_db/models/metadata/model_meta.py +40 -12
  191. endoreg_db/models/metadata/model_meta_logic.py +51 -16
  192. endoreg_db/models/metadata/sensitive_meta.py +65 -28
  193. endoreg_db/models/metadata/sensitive_meta_logic.py +28 -26
  194. endoreg_db/models/metadata/video_meta.py +146 -39
  195. endoreg_db/models/metadata/video_prediction_logic.py +70 -21
  196. endoreg_db/models/metadata/video_prediction_meta.py +80 -27
  197. endoreg_db/models/operation_log.py +63 -0
  198. endoreg_db/models/other/__init__.py +10 -10
  199. endoreg_db/models/other/distribution/__init__.py +9 -7
  200. endoreg_db/models/other/distribution/base_value_distribution.py +3 -1
  201. endoreg_db/models/other/distribution/date_value_distribution.py +19 -5
  202. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +3 -1
  203. endoreg_db/models/other/distribution/numeric_value_distribution.py +34 -9
  204. endoreg_db/models/other/emission/__init__.py +1 -1
  205. endoreg_db/models/other/emission/emission_factor.py +9 -3
  206. endoreg_db/models/other/information_source.py +15 -5
  207. endoreg_db/models/other/material.py +3 -1
  208. endoreg_db/models/other/transport_route.py +3 -1
  209. endoreg_db/models/other/unit.py +6 -2
  210. endoreg_db/models/report/report.py +0 -1
  211. endoreg_db/models/requirement/requirement.py +84 -27
  212. endoreg_db/models/requirement/requirement_error.py +5 -6
  213. endoreg_db/models/requirement/requirement_evaluation/__init__.py +1 -1
  214. endoreg_db/models/requirement/requirement_evaluation/evaluate_with_dependencies.py +8 -8
  215. endoreg_db/models/requirement/requirement_evaluation/get_values.py +3 -3
  216. endoreg_db/models/requirement/requirement_evaluation/requirement_type_parser.py +24 -8
  217. endoreg_db/models/requirement/requirement_operator.py +28 -8
  218. endoreg_db/models/requirement/requirement_set.py +34 -11
  219. endoreg_db/models/state/__init__.py +1 -0
  220. endoreg_db/models/state/audit_ledger.py +9 -2
  221. endoreg_db/models/{media → state}/processing_history/__init__.py +1 -3
  222. endoreg_db/models/state/processing_history/processing_history.py +136 -0
  223. endoreg_db/models/state/raw_pdf.py +0 -1
  224. endoreg_db/models/state/video.py +2 -4
  225. endoreg_db/models/utils.py +4 -2
  226. endoreg_db/queries/__init__.py +2 -6
  227. endoreg_db/queries/annotations/__init__.py +1 -3
  228. endoreg_db/queries/annotations/legacy.py +37 -26
  229. endoreg_db/root_urls.py +3 -4
  230. endoreg_db/schemas/examination_evaluation.py +3 -0
  231. endoreg_db/serializers/Frames_NICE_and_PARIS_classifications.py +249 -163
  232. endoreg_db/serializers/__init__.py +2 -8
  233. endoreg_db/serializers/administration/__init__.py +1 -2
  234. endoreg_db/serializers/administration/ai/__init__.py +0 -1
  235. endoreg_db/serializers/administration/ai/active_model.py +3 -1
  236. endoreg_db/serializers/administration/ai/ai_model.py +5 -3
  237. endoreg_db/serializers/administration/ai/model_type.py +3 -1
  238. endoreg_db/serializers/administration/center.py +7 -2
  239. endoreg_db/serializers/administration/gender.py +4 -2
  240. endoreg_db/serializers/anonymization.py +13 -13
  241. endoreg_db/serializers/evaluation/examination_evaluation.py +0 -1
  242. endoreg_db/serializers/examination/__init__.py +1 -1
  243. endoreg_db/serializers/examination/base.py +12 -13
  244. endoreg_db/serializers/examination/dropdown.py +6 -7
  245. endoreg_db/serializers/examination_serializer.py +3 -6
  246. endoreg_db/serializers/finding/__init__.py +1 -1
  247. endoreg_db/serializers/finding/finding.py +14 -7
  248. endoreg_db/serializers/finding_classification/__init__.py +3 -3
  249. endoreg_db/serializers/finding_classification/choice.py +3 -3
  250. endoreg_db/serializers/finding_classification/classification.py +2 -4
  251. endoreg_db/serializers/label_video_segment/__init__.py +5 -3
  252. endoreg_db/serializers/{label → label_video_segment}/image_classification_annotation.py +5 -5
  253. endoreg_db/serializers/label_video_segment/label/__init__.py +6 -0
  254. endoreg_db/serializers/{label → label_video_segment/label}/label.py +1 -1
  255. endoreg_db/serializers/label_video_segment/label_video_segment.py +338 -228
  256. endoreg_db/serializers/meta/__init__.py +1 -2
  257. endoreg_db/serializers/meta/sensitive_meta_detail.py +28 -13
  258. endoreg_db/serializers/meta/sensitive_meta_update.py +51 -46
  259. endoreg_db/serializers/meta/sensitive_meta_verification.py +19 -16
  260. endoreg_db/serializers/misc/__init__.py +2 -2
  261. endoreg_db/serializers/misc/file_overview.py +11 -7
  262. endoreg_db/serializers/misc/stats.py +10 -8
  263. endoreg_db/serializers/misc/translatable_field_mix_in.py +6 -6
  264. endoreg_db/serializers/misc/upload_job.py +32 -29
  265. endoreg_db/serializers/patient/__init__.py +2 -1
  266. endoreg_db/serializers/patient/patient.py +32 -15
  267. endoreg_db/serializers/patient/patient_dropdown.py +11 -3
  268. endoreg_db/serializers/patient_examination/__init__.py +1 -1
  269. endoreg_db/serializers/patient_examination/patient_examination.py +67 -40
  270. endoreg_db/serializers/patient_finding/__init__.py +1 -1
  271. endoreg_db/serializers/patient_finding/patient_finding.py +2 -1
  272. endoreg_db/serializers/patient_finding/patient_finding_classification.py +17 -9
  273. endoreg_db/serializers/patient_finding/patient_finding_detail.py +26 -17
  274. endoreg_db/serializers/patient_finding/patient_finding_intervention.py +7 -5
  275. endoreg_db/serializers/patient_finding/patient_finding_list.py +10 -11
  276. endoreg_db/serializers/patient_finding/patient_finding_write.py +36 -27
  277. endoreg_db/serializers/pdf/__init__.py +1 -3
  278. endoreg_db/serializers/requirements/requirement_schema.py +1 -6
  279. endoreg_db/serializers/sensitive_meta_serializer.py +100 -81
  280. endoreg_db/serializers/video/__init__.py +2 -2
  281. endoreg_db/serializers/video/{segmentation.py → video_file.py} +66 -47
  282. endoreg_db/serializers/video/video_file_brief.py +6 -2
  283. endoreg_db/serializers/video/video_file_detail.py +36 -23
  284. endoreg_db/serializers/video/video_file_list.py +4 -2
  285. endoreg_db/serializers/video/video_processing_history.py +54 -50
  286. endoreg_db/services/__init__.py +1 -1
  287. endoreg_db/services/anonymization.py +2 -2
  288. endoreg_db/services/examination_evaluation.py +40 -17
  289. endoreg_db/services/model_meta_from_hf.py +76 -0
  290. endoreg_db/services/polling_coordinator.py +101 -70
  291. endoreg_db/services/pseudonym_service.py +27 -22
  292. endoreg_db/services/report_import.py +6 -3
  293. endoreg_db/services/segment_sync.py +75 -59
  294. endoreg_db/services/video_import.py +6 -7
  295. endoreg_db/urls/__init__.py +2 -2
  296. endoreg_db/urls/ai.py +7 -25
  297. endoreg_db/urls/anonymization.py +61 -15
  298. endoreg_db/urls/auth.py +4 -4
  299. endoreg_db/urls/classification.py +4 -9
  300. endoreg_db/urls/examination.py +27 -18
  301. endoreg_db/urls/media.py +27 -34
  302. endoreg_db/urls/patient.py +11 -7
  303. endoreg_db/urls/requirements.py +3 -1
  304. endoreg_db/urls/root_urls.py +2 -3
  305. endoreg_db/urls/stats.py +24 -16
  306. endoreg_db/urls/upload.py +3 -11
  307. endoreg_db/utils/__init__.py +14 -15
  308. endoreg_db/utils/ai/__init__.py +1 -1
  309. endoreg_db/utils/ai/data_loader_for_model_input.py +262 -0
  310. endoreg_db/utils/ai/data_loader_for_model_training.py +262 -0
  311. endoreg_db/utils/ai/get.py +2 -1
  312. endoreg_db/utils/ai/inference_dataset.py +14 -15
  313. endoreg_db/utils/ai/model_training/config.py +117 -0
  314. endoreg_db/utils/ai/model_training/dataset.py +74 -0
  315. endoreg_db/utils/ai/model_training/losses.py +68 -0
  316. endoreg_db/utils/ai/model_training/metrics.py +78 -0
  317. endoreg_db/utils/ai/model_training/model_backbones.py +155 -0
  318. endoreg_db/utils/ai/model_training/model_gastronet_resnet.py +118 -0
  319. endoreg_db/utils/ai/model_training/trainer_gastronet_multilabel.py +771 -0
  320. endoreg_db/utils/ai/multilabel_classification_net.py +21 -6
  321. endoreg_db/utils/ai/predict.py +4 -4
  322. endoreg_db/utils/ai/preprocess.py +19 -11
  323. endoreg_db/utils/calc_duration_seconds.py +4 -4
  324. endoreg_db/utils/case_generator/lab_sample_factory.py +3 -4
  325. endoreg_db/utils/check_video_files.py +74 -47
  326. endoreg_db/utils/cropping.py +10 -9
  327. endoreg_db/utils/dataloader.py +11 -3
  328. endoreg_db/utils/dates.py +3 -4
  329. endoreg_db/utils/defaults/set_default_center.py +7 -6
  330. endoreg_db/utils/env.py +6 -2
  331. endoreg_db/utils/extract_specific_frames.py +24 -9
  332. endoreg_db/utils/file_operations.py +30 -18
  333. endoreg_db/utils/fix_video_path_direct.py +57 -41
  334. endoreg_db/utils/frame_anonymization_utils.py +157 -157
  335. endoreg_db/utils/hashs.py +3 -18
  336. endoreg_db/utils/links/requirement_link.py +96 -52
  337. endoreg_db/utils/ocr.py +30 -25
  338. endoreg_db/utils/operation_log.py +61 -0
  339. endoreg_db/utils/parse_and_generate_yaml.py +12 -13
  340. endoreg_db/utils/paths.py +6 -6
  341. endoreg_db/utils/permissions.py +40 -24
  342. endoreg_db/utils/pipelines/process_video_dir.py +50 -26
  343. endoreg_db/utils/product/sum_emissions.py +5 -3
  344. endoreg_db/utils/product/sum_weights.py +4 -2
  345. endoreg_db/utils/pydantic_models/__init__.py +3 -4
  346. endoreg_db/utils/requirement_operator_logic/_old/lab_value_operators.py +207 -107
  347. endoreg_db/utils/requirement_operator_logic/_old/model_evaluators.py +252 -65
  348. endoreg_db/utils/requirement_operator_logic/new_operator_logic.py +27 -10
  349. endoreg_db/utils/setup_config.py +21 -5
  350. endoreg_db/utils/storage.py +3 -1
  351. endoreg_db/utils/translation.py +19 -15
  352. endoreg_db/utils/uuid.py +1 -0
  353. endoreg_db/utils/validate_endo_roi.py +12 -4
  354. endoreg_db/utils/validate_subcategory_dict.py +26 -24
  355. endoreg_db/utils/validate_video_detailed.py +207 -149
  356. endoreg_db/utils/video/__init__.py +7 -3
  357. endoreg_db/utils/video/extract_frames.py +30 -18
  358. endoreg_db/utils/video/names.py +11 -6
  359. endoreg_db/utils/video/streaming_processor.py +175 -101
  360. endoreg_db/utils/video/video_splitter.py +30 -19
  361. endoreg_db/views/Frames_NICE_and_PARIS_classifications_views.py +59 -50
  362. endoreg_db/views/__init__.py +0 -20
  363. endoreg_db/views/anonymization/__init__.py +6 -2
  364. endoreg_db/views/anonymization/media_management.py +2 -6
  365. endoreg_db/views/anonymization/overview.py +34 -1
  366. endoreg_db/views/anonymization/validate.py +79 -18
  367. endoreg_db/views/auth/__init__.py +1 -1
  368. endoreg_db/views/auth/keycloak.py +16 -14
  369. endoreg_db/views/examination/__init__.py +12 -15
  370. endoreg_db/views/examination/examination.py +5 -5
  371. endoreg_db/views/examination/examination_manifest_cache.py +5 -5
  372. endoreg_db/views/examination/get_finding_classification_choices.py +8 -5
  373. endoreg_db/views/examination/get_finding_classifications.py +9 -7
  374. endoreg_db/views/examination/get_findings.py +8 -10
  375. endoreg_db/views/examination/get_instruments.py +3 -2
  376. endoreg_db/views/examination/get_interventions.py +1 -1
  377. endoreg_db/views/finding/__init__.py +2 -2
  378. endoreg_db/views/finding/finding.py +58 -54
  379. endoreg_db/views/finding/get_classifications.py +1 -1
  380. endoreg_db/views/finding/get_interventions.py +1 -1
  381. endoreg_db/views/finding_classification/__init__.py +5 -5
  382. endoreg_db/views/finding_classification/finding_classification.py +5 -6
  383. endoreg_db/views/finding_classification/get_classification_choices.py +3 -4
  384. endoreg_db/views/media/__init__.py +13 -13
  385. endoreg_db/views/media/pdf_media.py +9 -9
  386. endoreg_db/views/media/sensitive_metadata.py +10 -7
  387. endoreg_db/views/media/video_media.py +4 -4
  388. endoreg_db/views/meta/__init__.py +1 -1
  389. endoreg_db/views/meta/sensitive_meta_list.py +20 -22
  390. endoreg_db/views/meta/sensitive_meta_verification.py +14 -11
  391. endoreg_db/views/misc/__init__.py +6 -34
  392. endoreg_db/views/misc/center.py +2 -1
  393. endoreg_db/views/misc/csrf.py +2 -1
  394. endoreg_db/views/misc/gender.py +2 -1
  395. endoreg_db/views/misc/stats.py +141 -106
  396. endoreg_db/views/patient/__init__.py +1 -3
  397. endoreg_db/views/patient/patient.py +141 -99
  398. endoreg_db/views/patient_examination/__init__.py +5 -5
  399. endoreg_db/views/patient_examination/patient_examination.py +43 -42
  400. endoreg_db/views/patient_examination/patient_examination_create.py +10 -15
  401. endoreg_db/views/patient_examination/patient_examination_detail.py +12 -15
  402. endoreg_db/views/patient_examination/patient_examination_list.py +21 -17
  403. endoreg_db/views/patient_examination/video.py +114 -80
  404. endoreg_db/views/patient_finding/__init__.py +1 -1
  405. endoreg_db/views/patient_finding/patient_finding.py +17 -10
  406. endoreg_db/views/patient_finding/patient_finding_optimized.py +127 -95
  407. endoreg_db/views/patient_finding_classification/__init__.py +1 -1
  408. endoreg_db/views/patient_finding_classification/pfc_create.py +35 -27
  409. endoreg_db/views/report/reimport.py +1 -1
  410. endoreg_db/views/report/report_stream.py +5 -8
  411. endoreg_db/views/requirement/__init__.py +2 -1
  412. endoreg_db/views/requirement/evaluate.py +7 -9
  413. endoreg_db/views/requirement/lookup.py +2 -3
  414. endoreg_db/views/requirement/lookup_store.py +0 -1
  415. endoreg_db/views/requirement/requirement_utils.py +2 -4
  416. endoreg_db/views/stats/__init__.py +4 -4
  417. endoreg_db/views/stats/stats_views.py +152 -115
  418. endoreg_db/views/video/__init__.py +18 -27
  419. endoreg_db/views/{ai → video/ai}/__init__.py +2 -2
  420. endoreg_db/views/{ai → video/ai}/label.py +20 -16
  421. endoreg_db/views/video/correction.py +5 -6
  422. endoreg_db/views/video/reimport.py +134 -99
  423. endoreg_db/views/video/segments_crud.py +134 -44
  424. endoreg_db/views/video/video_apply_mask.py +13 -12
  425. endoreg_db/views/video/video_correction.py +2 -1
  426. endoreg_db/views/video/video_download_processed.py +15 -15
  427. endoreg_db/views/video/video_meta_stats.py +7 -6
  428. endoreg_db/views/video/video_processing_history.py +3 -2
  429. endoreg_db/views/video/video_remove_frames.py +13 -12
  430. endoreg_db/views/video/video_stream.py +110 -82
  431. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/METADATA +9 -3
  432. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/RECORD +434 -431
  433. endoreg_db/management/commands/import_fallback_video.py +0 -203
  434. endoreg_db/management/commands/import_video.py +0 -422
  435. endoreg_db/management/commands/import_video_with_classification.py +0 -367
  436. endoreg_db/models/media/processing_history/processing_history.py +0 -96
  437. endoreg_db/serializers/label/__init__.py +0 -7
  438. endoreg_db/serializers/label_video_segment/_lvs_create.py +0 -149
  439. endoreg_db/serializers/label_video_segment/_lvs_update.py +0 -138
  440. endoreg_db/serializers/label_video_segment/_lvs_validate.py +0 -149
  441. endoreg_db/serializers/label_video_segment/label_video_segment_annotation.py +0 -99
  442. endoreg_db/serializers/label_video_segment/label_video_segment_update.py +0 -163
  443. endoreg_db/services/__old/pdf_import.py +0 -1487
  444. endoreg_db/services/__old/video_import.py +0 -1306
  445. endoreg_db/tasks/upload_tasks.py +0 -216
  446. endoreg_db/tasks/video_ingest.py +0 -161
  447. endoreg_db/tasks/video_processing_tasks.py +0 -327
  448. endoreg_db/views/misc/translation.py +0 -182
  449. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/WHEEL +0 -0
  450. {endoreg_db-0.8.9.2.dist-info → endoreg_db-0.8.9.10.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,262 @@
1
+ # endoreg_db/utils/ai/data_loader_for_model_training.py
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import defaultdict
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, TypedDict
8
+
9
+ from django.db import models
10
+
11
+ from endoreg_db.models import (
12
+ AIDataSet,
13
+ Frame,
14
+ ImageClassificationAnnotation,
15
+ Label,
16
+ LabelSet,
17
+ )
18
+
19
+
20
+ class ImageMultilabelDataset(TypedDict):
21
+ """
22
+ In-memory representation of an image multi-label training dataset.
23
+
24
+ All lists are aligned by index:
25
+
26
+ image_paths[i] -> path to image file for sample i
27
+ label_vectors[i] -> list[int|None] of length == len(labels)
28
+ label_masks[i] -> list[int] of length == len(labels)
29
+
30
+ Where:
31
+ - label_vectors[i][j] is:
32
+ 1 -> positive annotation (value=True)
33
+ 0 -> negative annotation (value=False)
34
+ None -> UNKNOWN (no annotation for that (frame, label))
35
+
36
+ - label_masks[i][j] is:
37
+ 1 -> this entry participates in the loss (0 or 1 is known)
38
+ 0 -> IGNORE in the loss (value was None)
39
+ """
40
+
41
+ # type description of the returned dict.
42
+
43
+ image_paths: List[str]
44
+ label_vectors: List[List[Optional[int]]]
45
+ label_masks: List[List[int]]
46
+ labels: List[Label]
47
+ labelset: LabelSet
48
+
49
+ # New: keep track of which DB rows were used, and their legacy exam ids
50
+ frame_ids: List[int] # Frame.pk for each sample
51
+ old_examination_ids: List[Optional[int]] # may be None if not set
52
+
53
+
54
+ def _infer_labelset_from_annotations(
55
+ annotations_qs: models.QuerySet[ImageClassificationAnnotation],
56
+ ) -> LabelSet:
57
+ """
58
+ Try to infer a unique LabelSet from the labels used in the annotations.
59
+
60
+ Strategy:
61
+ 1. Collect all distinct label_ids from the annotations.
62
+ 2. Fetch all Label objects + their label_sets.
63
+ 3. Compute the intersection of all label_sets across all labels.
64
+ 4. If there is exactly ONE common LabelSet, return it.
65
+ Otherwise, raise NotImplementedError for now.
66
+ """
67
+ label_ids = list(annotations_qs.values_list("label_id", flat=True).distinct())
68
+ if not label_ids:
69
+ raise ValueError("Cannot infer LabelSet: annotations queryset has no labels.")
70
+
71
+ labels_qs = Label.objects.filter(id__in=label_ids).prefetch_related("label_sets")
72
+ labelsets_for_each_label = []
73
+
74
+ for lbl in labels_qs:
75
+ # lbl.label_sets is the reverse of LabelSet.labels M2M
76
+ ls_ids = list(lbl.label_sets.values_list("id", flat=True))
77
+ if not ls_ids:
78
+ # This label is not part of any LabelSet -> ambiguous
79
+ raise NotImplementedError(
80
+ f"Label id={lbl.id}, name='{lbl.name}' is not part of any LabelSet. "
81
+ "Explicit LabelSet selection is required."
82
+ )
83
+ labelsets_for_each_label.append(set(ls_ids))
84
+
85
+ # Intersection of all labelset id sets
86
+ common_ids = set.intersection(*labelsets_for_each_label)
87
+ if not common_ids:
88
+ raise NotImplementedError(
89
+ "No common LabelSet across all labels in this AIDataSet. "
90
+ "Please specify a LabelSet explicitly."
91
+ )
92
+ if len(common_ids) > 1:
93
+ raise NotImplementedError(
94
+ "More than one common LabelSet found for the labels in this AIDataSet. "
95
+ "Please specify a LabelSet explicitly to disambiguate."
96
+ )
97
+
98
+ ls_id = next(iter(common_ids))
99
+ return LabelSet.objects.get(id=ls_id)
100
+
101
+
102
+ def build_image_multilabel_dataset_from_db(
103
+ dataset: AIDataSet,
104
+ labelset: Optional[LabelSet] = None,
105
+ ) -> ImageMultilabelDataset:
106
+ """
107
+ Build an in-memory multilabel dataset for an IMAGE-based AIDataSet.
108
+
109
+ Steps:
110
+ 1. Take all ImageClassificationAnnotation rows linked to this AIDataSet
111
+ (via dataset.image_annotations M2M).
112
+ 2. Determine the LabelSet (either explicitly given or inferred).
113
+ 3. For each used Frame, build:
114
+ - an image path
115
+ - a label vector (1, 0, or None for each label in LabelSet)
116
+ - a mask vector (1 where known, 0 where unknown)
117
+ 4. Return a dict that can be wrapped in a torch/tf Dataset.
118
+
119
+ NOTE:
120
+ - This function does NOT write anything to the DB.
121
+ - It only reads DB rows and returns Python structures.
122
+ """
123
+ if dataset.dataset_type != AIDataSet.DATASET_TYPE_IMAGE:
124
+ raise ValueError(
125
+ f"build_image_multilabel_dataset_from_db expected dataset_type='image', "
126
+ f"got '{dataset.dataset_type}' for AIDataSet id={dataset.id}."
127
+ )
128
+
129
+ # Get the annotation relation dynamically (for future video/text types)
130
+ annotations_qs = dataset.get_annotations_queryset().select_related("frame", "label")
131
+
132
+ if annotations_qs.count() == 0:
133
+ raise ValueError(
134
+ f"AIDataSet id={dataset.id} has no annotations attached. "
135
+ "Make sure your import script populated image_annotations."
136
+ )
137
+
138
+ # Decide which LabelSet to use
139
+ if labelset is None:
140
+ labelset = _infer_labelset_from_annotations(annotations_qs)
141
+
142
+ # Fixed label order (= fixed column order for the label vectors)
143
+ labels_in_order: List[Label] = labelset.get_labels_in_order()
144
+ if not labels_in_order:
145
+ raise ValueError(
146
+ f"LabelSet id={labelset.id}, name='{labelset.name}' has no labels."
147
+ )
148
+
149
+ num_labels = len(labels_in_order)
150
+ label_index: Dict[int, int] = {
151
+ lbl.id: idx for idx, lbl in enumerate(labels_in_order)
152
+ }
153
+
154
+ # Group annotations by frame
155
+ anns_by_frame: Dict[int, List[ImageClassificationAnnotation]] = defaultdict(list)
156
+ frames_order: List[int] = []
157
+
158
+ for ann in annotations_qs:
159
+ frame_id = ann.frame_id
160
+ if frame_id not in anns_by_frame:
161
+ frames_order.append(frame_id)
162
+ anns_by_frame[frame_id].append(ann)
163
+
164
+ # Build vectors
165
+ image_paths: List[str] = []
166
+ label_vectors: List[List[Optional[int]]] = []
167
+ label_masks: List[List[int]] = []
168
+
169
+ # New: id tracking for splitting / logging
170
+ frame_ids: List[int] = []
171
+ old_examination_ids: List[Optional[int]] = []
172
+
173
+ # Cache frames to avoid repeated DB hits
174
+ frame_obj_by_id: Dict[int, Frame] = {}
175
+
176
+ for frame_id in frames_order:
177
+ frame_annotations = anns_by_frame[frame_id]
178
+
179
+ # Resolve frame object (from first annotation of this frame)
180
+ frame = frame_obj_by_id.get(frame_id)
181
+ if frame is None:
182
+ frame = frame_annotations[0].frame
183
+ frame_obj_by_id[frame_id] = frame
184
+
185
+ # New: remember DB ids for this sample
186
+ frame_ids.append(frame_id)
187
+ old_examination_ids.append(getattr(frame, "old_examination_id", None))
188
+
189
+ # Start with unknown for all labels
190
+ vec: List[Optional[int]] = [None] * num_labels
191
+
192
+ # Fill with 1/0 where we have annotations
193
+ for ann in frame_annotations:
194
+ idx = label_index.get(ann.label_id)
195
+ if idx is None:
196
+ # Label not part of this LabelSet: ignore
197
+ continue
198
+ vec[idx] = 1 if ann.value else 0
199
+
200
+ # Build mask: 1 where vec is known, 0 where unknown
201
+ mask: List[int] = [0 if v is None else 1 for v in vec]
202
+
203
+ # Resolve absolute image path from the Frame model
204
+ file_path: Path = frame.file_path
205
+ image_paths.append(str(file_path))
206
+ label_vectors.append(vec)
207
+ label_masks.append(mask)
208
+
209
+ return ImageMultilabelDataset(
210
+ image_paths=image_paths,
211
+ label_vectors=label_vectors,
212
+ label_masks=label_masks,
213
+ labels=labels_in_order,
214
+ labelset=labelset,
215
+ frame_ids=frame_ids,
216
+ old_examination_ids=old_examination_ids,
217
+ )
218
+
219
+
220
+ def build_dataset_for_training(
221
+ dataset: AIDataSet,
222
+ labelset: Optional[LabelSet] = None,
223
+ ):
224
+ """
225
+ High-level entry point to build a training dataset from an AIDataSet row.
226
+
227
+ It inspects:
228
+ - dataset.dataset_type
229
+ - dataset.ai_model_type
230
+
231
+ and dispatches to the appropriate builder.
232
+
233
+ For now, we support:
234
+ - dataset_type = "image"
235
+ - ai_model_type = "image_multilabel_classification"
236
+
237
+ Later, you can extend this to:
238
+ - video segmentation
239
+ - text classification
240
+ etc.
241
+ """
242
+ # IMAGE MULTILABEL CASE
243
+ if (
244
+ dataset.dataset_type == AIDataSet.DATASET_TYPE_IMAGE
245
+ and dataset.ai_model_type == AIDataSet.AI_MODEL_TYPE_IMAGE_MULTILABEL
246
+ ):
247
+ return build_image_multilabel_dataset_from_db(dataset, labelset=labelset)
248
+
249
+ # FUTURE EXTENSIONS (example structure, not yet implemented):
250
+ # if dataset.dataset_type == AIDataSet.DATASET_TYPE_VIDEO and \
251
+ # dataset.ai_model_type == AIDataSet.AI_MODEL_TYPE_VIDEO_SEGMENTATION:
252
+ # return build_video_segmentation_dataset_from_db(dataset, labelset=labelset)
253
+ #
254
+ # if dataset.dataset_type == AIDataSet.DATASET_TYPE_TEXT and \
255
+ # dataset.ai_model_type == AIDataSet.AI_MODEL_TYPE_TEXT_CLASSIFICATION:
256
+ # return build_text_classification_dataset_from_db(dataset, labelset=labelset)
257
+
258
+ raise NotImplementedError(
259
+ f"No dataset builder implemented for "
260
+ f"dataset_type='{dataset.dataset_type}', "
261
+ f"ai_model_type='{dataset.ai_model_type}'."
262
+ )
@@ -1,5 +1,6 @@
1
1
  from endoreg_db.models import ModelMeta
2
2
 
3
+
3
4
  def get_latest_model_meta_by_model_name(model_name):
4
- model_meta = ModelMeta.objects.filter(name=model_name).order_by('-version').first()
5
+ model_meta = ModelMeta.objects.filter(name=model_name).order_by("-version").first()
5
6
  return model_meta
@@ -4,6 +4,7 @@ from PIL import Image
4
4
  from torchvision import transforms
5
5
  from .preprocess import Cropper
6
6
 
7
+
7
8
  class InferenceDataset(Dataset):
8
9
  def __init__(self, paths, crops, config):
9
10
  self.paths = paths
@@ -12,13 +13,15 @@ class InferenceDataset(Dataset):
12
13
  self.config = config
13
14
 
14
15
  # Initialize the image transformations using torchvision
15
- self.transforms = transforms.Compose([
16
- # Convert PIL image to PyTorch tensor
17
- transforms.ToTensor(),
18
- # Normalize the image using the provided mean and std
19
- transforms.Normalize(mean=self.config["mean"], std=self.config["std"])
20
- ])
21
-
16
+ self.transforms = transforms.Compose(
17
+ [
18
+ # Convert PIL image to PyTorch tensor
19
+ transforms.ToTensor(),
20
+ # Normalize the image using the provided mean and std
21
+ transforms.Normalize(mean=self.config["mean"], std=self.config["std"]),
22
+ ]
23
+ )
24
+
22
25
  def __len__(self):
23
26
  # Returns the total number of samples
24
27
  return len(self.paths)
@@ -27,7 +30,7 @@ class InferenceDataset(Dataset):
27
30
  # Open the image with Pillow
28
31
  with Image.open(self.paths[idx]) as pil_image:
29
32
  # Convert the image to RGB to ensure 3 channels
30
- pil_image = pil_image.convert('RGB')
33
+ pil_image = pil_image.convert("RGB")
31
34
 
32
35
  # Get the corresponding crop for the current image
33
36
  crop = self.crops[idx]
@@ -36,17 +39,13 @@ class InferenceDataset(Dataset):
36
39
  cropped = self.cropper(
37
40
  np.array(pil_image), # Convert PIL image to numpy array for cropping
38
41
  crop,
39
- scale=[
40
- self.config["size_x"],
41
- self.config["size_y"]
42
- ]
42
+ scale=[self.config["size_x"], self.config["size_y"]],
43
43
  )
44
44
 
45
45
  # Convert cropped numpy array back to PIL image for torchvision transforms
46
- cropped_pil = Image.fromarray(cropped.astype('uint8'), 'RGB')
47
-
46
+ cropped_pil = Image.fromarray(cropped.astype("uint8"), "RGB")
47
+
48
48
  # Apply the transformations
49
49
  img = self.transforms(cropped_pil)
50
50
 
51
-
52
51
  return img
@@ -0,0 +1,117 @@
1
+ # endoreg_db/utils/ai/model_training/config.py
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from django.conf import settings
10
+
11
+
12
+ # ---------------------------------------------------------------------
13
+ # PATHS
14
+ # ---------------------------------------------------------------------
15
+
16
+ # Base project directory (e.g. /home/admin/dev/endoreg-db)
17
+ BASE_DIR = Path(getattr(settings, "BASE_DIR", Path(__file__).resolve().parents[4]))
18
+
19
+ # All training artifacts go here:
20
+ # /home/admin/dev/endoreg-db/data/model_training/
21
+ TRAINING_ROOT = BASE_DIR / "data" / "model_training"
22
+ CHECKPOINTS_DIR = TRAINING_ROOT / "checkpoints"
23
+ RUNS_DIR = TRAINING_ROOT / "runs"
24
+
25
+ for d in (TRAINING_ROOT, CHECKPOINTS_DIR, RUNS_DIR):
26
+ d.mkdir(parents=True, exist_ok=True)
27
+
28
+ # Which LabelSet.version we train on (for label filtering)
29
+ DEFAULT_LABELSET_VERSION_TO_TRAIN: int = 2
30
+
31
+
32
+ # ---------------------------------------------------------------------
33
+ # TRAINING CONFIG
34
+ # ---------------------------------------------------------------------
35
+
36
+
37
+ @dataclass
38
+ class TrainingConfig:
39
+ """
40
+ Configuration for GastroNet multi-label training.
41
+
42
+ Most important knobs:
43
+ - dataset_id: which AIDataSet row to use from the database
44
+ - labelset_version_to_train: only labels belonging to LabelSet.version == this
45
+ are used for training (e.g. 2).
46
+ - treat_unlabeled_as_negative:
47
+ True -> Option A: for v2 labels, if not annotated in a frame, we
48
+ assume "absent" (0) and include it in the loss.
49
+ False -> keep "unknown" semantics (mask = 0, ignored in loss/metrics).
50
+
51
+ Learning rate schedule:
52
+ - lr_head / lr_backbone: base learning rates
53
+ - use_scheduler: if True, we use warm-up + cosine decay
54
+ - warmup_epochs: how many epochs to linearly increase LR from 0 → base LR
55
+ - min_lr: lowest LR reached at the end of cosine schedule
56
+ """
57
+
58
+ # --- WHAT TO TRAIN ON -------------------------------------------------
59
+ dataset_id: int
60
+
61
+ # Train only on labels belonging to ANY LabelSet with this version.
62
+ labelset_version_to_train: int = DEFAULT_LABELSET_VERSION_TO_TRAIN
63
+
64
+ # Path to GastroNet RN50 checkpoint (.pth); if None, backbone is random.
65
+ backbone_checkpoint: Optional[str] = None
66
+
67
+ # --- EPOCHS / BATCHING -----------------------------------------------
68
+ num_epochs: int = 5
69
+ batch_size: int = 32
70
+
71
+ # Split ratios (by colonoscopy exam groups, not by individual frames)
72
+ val_split: float = 0.2
73
+ test_split: float = 0.1
74
+
75
+ # --- LEARNING RATES --------------------------------------------------
76
+ # Base learning rates for classifier head and backbone.
77
+ lr_head: float = 1e-3 # usually larger (newly initialized layer)
78
+ lr_backbone: float = 1e-4 # smaller (pretrained GastroNet backbone)
79
+
80
+ # --- FOCAL LOSS HYPERPARAMETERS -------------------------------------
81
+ gamma_focal: float = 2.0 # how strongly to focus on hard examples
82
+ alpha_focal: float = 0.25 # weight for positives vs negatives
83
+
84
+ # --- DEVICE & SEED ---------------------------------------------------
85
+ device: str = "auto" # "auto", "cpu", or "cuda"
86
+ random_seed: int = 42
87
+
88
+ # --- LABEL SEMANTICS -------------------------------------------------
89
+ # For the filtered labels (LabelSet.version == labelset_version_to_train):
90
+ # True -> Option A: unlabeled => negative (0) and mask=1 (supervised)
91
+ # False -> keep unlabeled as unknown (mask=0, ignored)
92
+ treat_unlabeled_as_negative: bool = True
93
+
94
+ # --- LR SCHEDULER: WARM-UP + COSINE DECAY ----------------------------
95
+ # If True, we apply:
96
+ # - linear warm-up for 'warmup_epochs'
97
+ # - then CosineAnnealingLR for the remaining epochs
98
+ use_scheduler: bool = True
99
+
100
+ # Number of warm-up epochs (can be 0 for "no warm-up").
101
+ warmup_epochs: int = 3
102
+
103
+ # Minimum learning rate at the end of cosine decay for all param groups.
104
+ # (Both head and backbone decay towards this value.)
105
+ min_lr: float = 1e-6
106
+
107
+ # which CNN backbone / weights to use
108
+ # "gastro_rn50" → current behavior (ResNet50 + GastroNet checkpoint)
109
+ # "resnet50_imagenet" → ResNet50 with ImageNet weights
110
+ # "resnet50_random" → ResNet50 with random initialization
111
+ # (later) "efficientnet_b0_imagenet", etc.
112
+ backbone_name: str = "gastro_rn50"
113
+
114
+ # whether to freeze backbone (feature extractor)
115
+ freeze_backbone: bool = True
116
+
117
+ # backbone_name: str = "gastro_rn50"
@@ -0,0 +1,74 @@
1
+ # endoreg_db/utils/ai/model_training/dataset.py
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional, Sequence, Tuple, List
6
+
7
+ import numpy as np
8
+ from PIL import Image
9
+ import torch
10
+ from torch.utils.data import Dataset
11
+
12
+
13
+ class EndoMultiLabelDataset(Dataset):
14
+ """
15
+ PyTorch dataset wrapping the output of build_dataset_for_training.
16
+
17
+ Each item is:
18
+ image_tensor: FloatTensor [3, H, W]
19
+ labels: FloatTensor [num_labels] (0 or 1; unknown -> 0 but masked)
20
+ mask: FloatTensor [num_labels] (1 known, 0 unknown)
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ image_paths: Sequence[str],
26
+ label_vectors: Sequence[Sequence[Optional[int]]],
27
+ label_masks: Sequence[Sequence[int]],
28
+ image_size: int = 224,
29
+ ) -> None:
30
+ assert len(image_paths) == len(label_vectors) == len(label_masks), (
31
+ "image_paths, label_vectors, label_masks must have same length"
32
+ )
33
+
34
+ self.image_paths: List[str] = list(image_paths)
35
+
36
+ # Convert vectors with None → 0, but mask will ensure they are ignored
37
+ label_vec_list = []
38
+ mask_list = []
39
+ for vec, mask in zip(label_vectors, label_masks):
40
+ v = [0 if (x is None) else int(x) for x in vec]
41
+ m = [int(x) for x in mask]
42
+ label_vec_list.append(v)
43
+ mask_list.append(m)
44
+
45
+ self.labels = torch.tensor(label_vec_list, dtype=torch.float32) # [N, C]
46
+ self.masks = torch.tensor(mask_list, dtype=torch.float32) # [N, C]
47
+
48
+ self.num_labels = self.labels.shape[1]
49
+ self.image_size = image_size
50
+
51
+ # ImageNet-style normalization
52
+ self.mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
53
+ self.std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
54
+
55
+ def __len__(self) -> int:
56
+ return len(self.image_paths)
57
+
58
+ def _load_image(self, path: str) -> torch.Tensor:
59
+ """
60
+ Load image from disk, resize, convert to normalized tensor [3, H, W].
61
+ """
62
+ img = Image.open(path).convert("RGB")
63
+ img = img.resize((self.image_size, self.image_size))
64
+ arr = np.array(img, dtype=np.float32) / 255.0 # [H, W, C]
65
+ tensor = torch.from_numpy(arr).permute(2, 0, 1) # [C, H, W]
66
+ tensor = (tensor - self.mean) / self.std
67
+ return tensor
68
+
69
+ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
70
+ path = self.image_paths[idx]
71
+ x = self._load_image(path)
72
+ y = self.labels[idx]
73
+ m = self.masks[idx]
74
+ return x, y, m
@@ -0,0 +1,68 @@
1
+ # endoreg_db/utils/ai/model_training/losses.py
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ import torch
8
+
9
+
10
+ def compute_class_weights(
11
+ labels: torch.Tensor,
12
+ masks: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> torch.Tensor:
15
+ """
16
+ Compute per-label weights based on positive counts.
17
+
18
+ labels: [N, C] in {0,1}
19
+ masks: [N, C] in {0,1}, 1 = known, 0 = unknown
20
+
21
+ w_j = 1 / (pos_j + eps), normalized so that mean(w) ≈ 1.
22
+ """
23
+ known = masks > 0.5
24
+ pos_counts = (labels * known).sum(dim=0) # [C]
25
+
26
+ raw_weights = 1.0 / (pos_counts + eps)
27
+ mean_w = raw_weights.mean().clamp(min=eps)
28
+ norm_weights = raw_weights / mean_w
29
+ return norm_weights # [C]
30
+
31
+
32
+ def focal_loss_with_mask(
33
+ logits: torch.Tensor,
34
+ targets: torch.Tensor,
35
+ masks: torch.Tensor,
36
+ class_weights: Optional[torch.Tensor] = None,
37
+ alpha: float = 0.25,
38
+ gamma: float = 2.0,
39
+ eps: float = 1e-6,
40
+ ) -> torch.Tensor:
41
+ """
42
+ Multi-label focal loss with:
43
+ - per-label class weights
44
+ - mask to ignore unknown labels.
45
+
46
+ logits: [B, C] raw outputs
47
+ targets: [B, C] 0/1
48
+ masks: [B, C] 1 = known, 0 = unknown
49
+ class_weights: [C] or None
50
+ """
51
+ prob = torch.sigmoid(logits).clamp(eps, 1.0 - eps) # [B, C]
52
+
53
+ # p_t: prob if y=1, (1-prob) if y=0
54
+ pt = prob * targets + (1.0 - prob) * (1.0 - targets)
55
+
56
+ alpha_factor = alpha * targets + (1.0 - alpha) * (1.0 - targets)
57
+ focal_factor = (1.0 - pt) ** gamma
58
+
59
+ loss = -alpha_factor * focal_factor * torch.log(pt) # [B, C]
60
+
61
+ if class_weights is not None:
62
+ loss = loss * class_weights.view(1, -1)
63
+
64
+ # apply mask → ignore unknown labels
65
+ loss = loss * masks
66
+
67
+ denom = masks.sum().clamp(min=1.0)
68
+ return loss.sum() / denom
@@ -0,0 +1,78 @@
1
+ # endoreg_db/utils/ai/model_training/metrics.py
2
+ from __future__ import annotations
3
+ import torch
4
+
5
+
6
+ def compute_metrics(logits, targets, masks, threshold=0.5):
7
+ """
8
+ Computes multi-label metrics:
9
+ - Global Precision/Recall/F1
10
+ - Per-label Precision/Recall/F1
11
+ """
12
+ probs = torch.sigmoid(logits)
13
+ preds = (probs >= threshold).int()
14
+ targets = targets.int()
15
+ masks = masks.int()
16
+
17
+ # Only evaluate where mask == 1
18
+ preds = preds * masks
19
+ targets = targets * masks
20
+
21
+ tp = (preds * targets).sum().item()
22
+ fp = (preds * (1 - targets)).sum().item()
23
+ fn = ((1 - preds) * targets).sum().item()
24
+ tn = ((1 - preds) * (1 - targets)).sum().item()
25
+
26
+ precision = tp / (tp + fp + 1e-6)
27
+ recall = tp / (tp + fn + 1e-6)
28
+ f1 = 2 * precision * recall / (precision + recall + 1e-6)
29
+ accuracy = (tp + tn) / (tp + tn + fp + fn + 1e-6)
30
+
31
+ # ------- PER-LABEL METRICS -------
32
+ per_label = []
33
+ num_labels = targets.shape[1]
34
+
35
+ for j in range(num_labels):
36
+ t = targets[:, j]
37
+ p = preds[:, j]
38
+ m = masks[:, j]
39
+
40
+ # consider only known labels
41
+ valid_idx = m == 1
42
+ if valid_idx.sum() == 0:
43
+ per_label.append(
44
+ {"precision": None, "recall": None, "f1": None, "support": 0}
45
+ )
46
+ continue
47
+
48
+ t = t[valid_idx]
49
+ p = p[valid_idx]
50
+
51
+ tp_j = ((p == 1) & (t == 1)).sum().item()
52
+ fp_j = ((p == 1) & (t == 0)).sum().item()
53
+ fn_j = ((p == 0) & (t == 1)).sum().item()
54
+
55
+ precision_j = tp_j / (tp_j + fp_j + 1e-6)
56
+ recall_j = tp_j / (tp_j + fn_j + 1e-6)
57
+ f1_j = 2 * precision_j * recall_j / (precision_j + recall_j + 1e-6)
58
+
59
+ per_label.append(
60
+ {
61
+ "precision": precision_j,
62
+ "recall": recall_j,
63
+ "f1": f1_j,
64
+ "support": t.sum().item(),
65
+ }
66
+ )
67
+
68
+ return {
69
+ "precision": precision,
70
+ "recall": recall,
71
+ "f1": f1,
72
+ "accuracy": accuracy,
73
+ "tp": tp,
74
+ "fp": fp,
75
+ "tn": tn,
76
+ "fn": fn,
77
+ "per_label": per_label,
78
+ }