endoreg-db 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (268) hide show
  1. endoreg_db/admin.py +90 -1
  2. endoreg_db/case_generator/case_generator.py +159 -0
  3. endoreg_db/case_generator/lab_sample_factory.py +33 -0
  4. endoreg_db/case_generator/utils.py +30 -0
  5. endoreg_db/data/__init__.py +50 -4
  6. endoreg_db/data/ai_model/data.yaml +7 -0
  7. endoreg_db/data/{label → ai_model_label}/label/data.yaml +27 -1
  8. endoreg_db/data/ai_model_label/label-set/data.yaml +21 -0
  9. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +5 -0
  10. endoreg_db/data/ai_model_video_segmentation_label/base_segmentation.yaml +176 -0
  11. endoreg_db/data/ai_model_video_segmentation_labelset/data.yaml +20 -0
  12. endoreg_db/data/center/data.yaml +35 -5
  13. endoreg_db/data/contraindication/bleeding.yaml +11 -0
  14. endoreg_db/data/distribution/numeric/data.yaml +14 -0
  15. endoreg_db/data/endoscope/data.yaml +93 -0
  16. endoreg_db/data/examination_indication/endoscopy.yaml +8 -0
  17. endoreg_db/data/examination_indication_classification/endoscopy.yaml +8 -0
  18. endoreg_db/data/examination_indication_classification_choice/endoscopy.yaml +101 -0
  19. endoreg_db/data/finding/data.yaml +141 -0
  20. endoreg_db/data/finding_intervention/endoscopy.yaml +138 -0
  21. endoreg_db/data/finding_intervention_type/endoscopy.yaml +15 -0
  22. endoreg_db/data/finding_location_classification/colonoscopy.yaml +46 -0
  23. endoreg_db/data/finding_location_classification_choice/colonoscopy.yaml +240 -0
  24. endoreg_db/data/finding_morphology_classification/colonoscopy.yaml +48 -0
  25. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_circularity_default.yaml +34 -0
  26. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_nice.yaml +20 -0
  27. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_paris.yaml +65 -0
  28. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_planarity_default.yaml +56 -0
  29. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_surface_intact_default.yaml +39 -0
  30. endoreg_db/data/finding_morphology_classification_choice/colonoscopy_size.yaml +57 -0
  31. endoreg_db/data/finding_morphology_classification_type/colonoscopy.yaml +79 -0
  32. endoreg_db/data/finding_type/data.yaml +30 -0
  33. endoreg_db/data/gender/data.yaml +17 -0
  34. endoreg_db/data/lab_value/cardiac_enzymes.yaml +7 -1
  35. endoreg_db/data/lab_value/coagulation.yaml +6 -1
  36. endoreg_db/data/lab_value/electrolytes.yaml +39 -1
  37. endoreg_db/data/lab_value/gastrointestinal_function.yaml +12 -0
  38. endoreg_db/data/lab_value/hematology.yaml +17 -2
  39. endoreg_db/data/lab_value/hormones.yaml +6 -0
  40. endoreg_db/data/lab_value/lipids.yaml +12 -3
  41. endoreg_db/data/lab_value/misc.yaml +5 -2
  42. endoreg_db/data/lab_value/renal_function.yaml +2 -1
  43. endoreg_db/data/lx_client_tag/base.yaml +54 -0
  44. endoreg_db/data/lx_client_type/base.yaml +30 -0
  45. endoreg_db/data/lx_permission/base.yaml +24 -0
  46. endoreg_db/data/lx_permission/endoreg.yaml +52 -0
  47. endoreg_db/data/medication_indication/anticoagulation.yaml +44 -49
  48. endoreg_db/data/names_first/first_names.yaml +51 -0
  49. endoreg_db/data/names_last/last_names.yaml +51 -0
  50. endoreg_db/data/network_device/data.yaml +30 -0
  51. endoreg_db/data/organ/data.yaml +29 -0
  52. endoreg_db/data/pdf_type/data.yaml +2 -1
  53. endoreg_db/data/report_reader_flag/ukw-examination-generic.yaml +4 -0
  54. endoreg_db/forms/__init__.py +3 -1
  55. endoreg_db/forms/examination_form.py +11 -0
  56. endoreg_db/forms/patient_finding_intervention_form.py +19 -0
  57. endoreg_db/forms/patient_form.py +26 -0
  58. endoreg_db/management/commands/__init__.py +0 -0
  59. endoreg_db/management/commands/load_ai_model_data.py +57 -23
  60. endoreg_db/management/commands/load_ai_model_label_data.py +59 -0
  61. endoreg_db/management/commands/load_base_db_data.py +160 -118
  62. endoreg_db/management/commands/{load_endoscope_type_data.py → load_contraindication_data.py} +3 -7
  63. endoreg_db/management/commands/load_disease_data.py +29 -7
  64. endoreg_db/management/commands/load_endoscope_data.py +68 -0
  65. endoreg_db/management/commands/load_examination_indication_data.py +65 -0
  66. endoreg_db/management/commands/load_finding_data.py +171 -0
  67. endoreg_db/management/commands/load_lab_value_data.py +3 -3
  68. endoreg_db/management/commands/load_lx_data.py +64 -0
  69. endoreg_db/management/commands/load_medication_data.py +83 -21
  70. endoreg_db/management/commands/load_name_data.py +37 -0
  71. endoreg_db/management/commands/{load_endoscopy_processor_data.py → load_organ_data.py} +7 -9
  72. endoreg_db/migrations/0001_initial.py +1206 -728
  73. endoreg_db/migrations/0002_alter_frame_image_alter_rawframe_image.py +23 -0
  74. endoreg_db/migrations/0003_alter_frame_image_alter_rawframe_image.py +23 -0
  75. endoreg_db/migrations/0004_alter_rawvideofile_file_alter_video_file.py +25 -0
  76. endoreg_db/migrations/0005_rawvideofile_frame_count_and_more.py +33 -0
  77. endoreg_db/migrations/0006_frame_extracted_rawframe_extracted.py +23 -0
  78. endoreg_db/migrations/0007_rename_pseudo_patient_video_patient_and_more.py +24 -0
  79. endoreg_db/migrations/0008_remove_reportfile_patient_examination_and_more.py +48 -0
  80. endoreg_db/models/__init__.py +331 -28
  81. endoreg_db/models/ai_model/__init__.py +1 -0
  82. endoreg_db/models/ai_model/ai_model.py +103 -0
  83. endoreg_db/models/ai_model/lightning/__init__.py +3 -0
  84. endoreg_db/models/ai_model/lightning/inference_dataset.py +53 -0
  85. endoreg_db/models/ai_model/lightning/multilabel_classification_net.py +155 -0
  86. endoreg_db/models/ai_model/lightning/postprocess.py +53 -0
  87. endoreg_db/models/ai_model/lightning/predict.py +172 -0
  88. endoreg_db/models/ai_model/lightning/prediction_visualizer.py +55 -0
  89. endoreg_db/models/ai_model/lightning/preprocess.py +68 -0
  90. endoreg_db/models/ai_model/lightning/run_visualizer.py +21 -0
  91. endoreg_db/models/ai_model/model_meta.py +232 -6
  92. endoreg_db/models/ai_model/model_type.py +13 -3
  93. endoreg_db/models/annotation/__init__.py +31 -2
  94. endoreg_db/models/annotation/anonymized_image_annotation.py +73 -18
  95. endoreg_db/models/annotation/binary_classification_annotation_task.py +94 -57
  96. endoreg_db/models/annotation/image_classification.py +73 -14
  97. endoreg_db/models/annotation/video_segmentation_annotation.py +52 -0
  98. endoreg_db/models/annotation/video_segmentation_labelset.py +20 -0
  99. endoreg_db/models/case/__init__.py +1 -0
  100. endoreg_db/models/{persons/patient/case → case}/case.py +4 -0
  101. endoreg_db/models/case_template/__init__.py +10 -1
  102. endoreg_db/models/case_template/case_template.py +57 -13
  103. endoreg_db/models/case_template/case_template_rule.py +5 -5
  104. endoreg_db/models/case_template/case_template_rule_value.py +19 -4
  105. endoreg_db/models/center/__init__.py +7 -0
  106. endoreg_db/models/center/center.py +31 -5
  107. endoreg_db/models/center/center_product.py +0 -1
  108. endoreg_db/models/center/center_resource.py +16 -2
  109. endoreg_db/models/center/center_waste.py +6 -1
  110. endoreg_db/models/contraindication/__init__.py +21 -0
  111. endoreg_db/models/data_file/__init__.py +38 -5
  112. endoreg_db/models/data_file/base_classes/__init__.py +6 -1
  113. endoreg_db/models/data_file/base_classes/abstract_frame.py +64 -15
  114. endoreg_db/models/data_file/base_classes/abstract_pdf.py +136 -0
  115. endoreg_db/models/data_file/base_classes/abstract_video.py +744 -138
  116. endoreg_db/models/data_file/base_classes/frame_helpers.py +17 -0
  117. endoreg_db/models/data_file/base_classes/prepare_bulk_frames.py +19 -0
  118. endoreg_db/models/data_file/base_classes/utils.py +80 -0
  119. endoreg_db/models/data_file/frame.py +22 -38
  120. endoreg_db/models/data_file/import_classes/__init__.py +4 -18
  121. endoreg_db/models/data_file/import_classes/raw_pdf.py +162 -90
  122. endoreg_db/models/data_file/import_classes/raw_video.py +239 -294
  123. endoreg_db/models/data_file/metadata/__init__.py +10 -0
  124. endoreg_db/models/data_file/metadata/pdf_meta.py +4 -0
  125. endoreg_db/models/data_file/metadata/sensitive_meta.py +265 -6
  126. endoreg_db/models/data_file/metadata/video_meta.py +116 -50
  127. endoreg_db/models/data_file/report_file.py +30 -63
  128. endoreg_db/models/data_file/video/__init__.py +6 -2
  129. endoreg_db/models/data_file/video/video.py +187 -16
  130. endoreg_db/models/data_file/video_segment.py +162 -55
  131. endoreg_db/models/disease.py +25 -2
  132. endoreg_db/models/emission/__init__.py +5 -1
  133. endoreg_db/models/emission/emission_factor.py +71 -6
  134. endoreg_db/models/event.py +51 -0
  135. endoreg_db/models/examination/__init__.py +6 -1
  136. endoreg_db/models/examination/examination.py +53 -12
  137. endoreg_db/models/examination/examination_indication.py +170 -0
  138. endoreg_db/models/examination/examination_time.py +31 -5
  139. endoreg_db/models/examination/examination_time_type.py +28 -4
  140. endoreg_db/models/examination/examination_type.py +28 -6
  141. endoreg_db/models/finding/__init__.py +11 -0
  142. endoreg_db/models/finding/finding.py +75 -0
  143. endoreg_db/models/finding/finding_intervention.py +60 -0
  144. endoreg_db/models/finding/finding_location_classification.py +94 -0
  145. endoreg_db/models/finding/finding_morphology_classification.py +89 -0
  146. endoreg_db/models/finding/finding_type.py +22 -0
  147. endoreg_db/models/hardware/endoscope.py +16 -0
  148. endoreg_db/models/hardware/endoscopy_processor.py +31 -19
  149. endoreg_db/models/label/label.py +35 -7
  150. endoreg_db/models/laboratory/lab_value.py +12 -3
  151. endoreg_db/models/logging/__init__.py +8 -1
  152. endoreg_db/models/lx/__init__.py +4 -0
  153. endoreg_db/models/lx/client.py +57 -0
  154. endoreg_db/models/lx/identity.py +34 -0
  155. endoreg_db/models/lx/permission.py +18 -0
  156. endoreg_db/models/lx/user.py +16 -0
  157. endoreg_db/models/medication/__init__.py +19 -1
  158. endoreg_db/models/medication/medication.py +7 -122
  159. endoreg_db/models/medication/medication_indication.py +50 -0
  160. endoreg_db/models/medication/medication_indication_type.py +34 -0
  161. endoreg_db/models/medication/medication_intake_time.py +26 -0
  162. endoreg_db/models/medication/medication_schedule.py +37 -0
  163. endoreg_db/models/network/__init__.py +7 -1
  164. endoreg_db/models/network/network_device.py +13 -8
  165. endoreg_db/models/organ/__init__.py +38 -0
  166. endoreg_db/models/other/__init__.py +19 -1
  167. endoreg_db/models/other/distribution/__init__.py +44 -0
  168. endoreg_db/models/other/distribution/base_value_distribution.py +20 -0
  169. endoreg_db/models/other/distribution/date_value_distribution.py +91 -0
  170. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +32 -0
  171. endoreg_db/models/other/distribution/numeric_value_distribution.py +97 -0
  172. endoreg_db/models/other/distribution/single_categorical_value_distribution.py +22 -0
  173. endoreg_db/models/other/distribution.py +1 -211
  174. endoreg_db/models/other/material.py +4 -0
  175. endoreg_db/models/other/transport_route.py +2 -1
  176. endoreg_db/models/patient/__init__.py +24 -0
  177. endoreg_db/models/patient/patient_examination.py +182 -0
  178. endoreg_db/models/patient/patient_finding.py +143 -0
  179. endoreg_db/models/patient/patient_finding_intervention.py +26 -0
  180. endoreg_db/models/patient/patient_finding_location.py +120 -0
  181. endoreg_db/models/patient/patient_finding_morphology.py +166 -0
  182. endoreg_db/models/persons/__init__.py +29 -2
  183. endoreg_db/models/persons/examiner/examiner.py +48 -4
  184. endoreg_db/models/persons/patient/__init__.py +1 -1
  185. endoreg_db/models/persons/patient/patient.py +227 -54
  186. endoreg_db/models/persons/patient/patient_disease.py +6 -0
  187. endoreg_db/models/persons/patient/patient_event.py +31 -1
  188. endoreg_db/models/persons/patient/patient_examination_indication.py +32 -0
  189. endoreg_db/models/persons/patient/patient_lab_sample.py +4 -2
  190. endoreg_db/models/persons/patient/patient_lab_value.py +37 -16
  191. endoreg_db/models/persons/patient/patient_medication.py +27 -12
  192. endoreg_db/models/persons/patient/patient_medication_schedule.py +62 -2
  193. endoreg_db/models/prediction/__init__.py +7 -1
  194. endoreg_db/models/prediction/image_classification.py +20 -6
  195. endoreg_db/models/prediction/video_prediction_meta.py +151 -89
  196. endoreg_db/models/product/__init__.py +10 -1
  197. endoreg_db/models/product/product.py +15 -2
  198. endoreg_db/models/product/product_group.py +8 -0
  199. endoreg_db/models/product/product_material.py +4 -0
  200. endoreg_db/models/product/product_weight.py +12 -0
  201. endoreg_db/models/product/reference_product.py +19 -3
  202. endoreg_db/models/quiz/__init__.py +8 -1
  203. endoreg_db/models/report_reader/__init__.py +6 -1
  204. endoreg_db/serializers/__init__.py +1 -1
  205. endoreg_db/serializers/annotation.py +2 -5
  206. endoreg_db/serializers/frame.py +1 -5
  207. endoreg_db/serializers/patient.py +26 -3
  208. endoreg_db/serializers/prediction.py +2 -7
  209. endoreg_db/serializers/raw_video_meta_validation.py +13 -0
  210. endoreg_db/serializers/video.py +6 -13
  211. endoreg_db/serializers/video_segmentation.py +492 -0
  212. endoreg_db/templates/admin/patient_finding_intervention.html +253 -0
  213. endoreg_db/templates/admin/start_examination.html +12 -0
  214. endoreg_db/templates/timeline.html +176 -0
  215. endoreg_db/urls.py +173 -0
  216. endoreg_db/utils/__init__.py +36 -1
  217. endoreg_db/utils/dataloader.py +45 -19
  218. endoreg_db/utils/dates.py +39 -0
  219. endoreg_db/utils/hashs.py +122 -4
  220. endoreg_db/utils/names.py +74 -0
  221. endoreg_db/utils/parse_and_generate_yaml.py +46 -0
  222. endoreg_db/utils/pydantic_models/__init__.py +6 -0
  223. endoreg_db/utils/pydantic_models/db_config.py +57 -0
  224. endoreg_db/utils/validate_endo_roi.py +19 -0
  225. endoreg_db/utils/validate_subcategory_dict.py +91 -0
  226. endoreg_db/utils/video/__init__.py +13 -0
  227. endoreg_db/utils/video/extract_frames.py +121 -0
  228. endoreg_db/utils/video/transcode_videofile.py +111 -0
  229. endoreg_db/views/__init__.py +2 -0
  230. endoreg_db/views/csrf.py +7 -0
  231. endoreg_db/views/patient_views.py +90 -0
  232. endoreg_db/views/raw_video_meta_validation_views.py +38 -0
  233. endoreg_db/views/report_views.py +96 -0
  234. endoreg_db/views/video_segmentation_views.py +149 -0
  235. endoreg_db/views/views_for_timeline.py +46 -0
  236. endoreg_db/views.py +0 -3
  237. endoreg_db-0.6.1.dist-info/METADATA +151 -0
  238. endoreg_db-0.6.1.dist-info/RECORD +420 -0
  239. {endoreg_db-0.5.3.dist-info → endoreg_db-0.6.1.dist-info}/WHEEL +1 -1
  240. endoreg_db/data/active_model/data.yaml +0 -3
  241. endoreg_db/data/label/label-set/data.yaml +0 -18
  242. endoreg_db/management/commands/delete_legacy_images.py +0 -19
  243. endoreg_db/management/commands/delete_legacy_videos.py +0 -17
  244. endoreg_db/management/commands/extract_legacy_video_frames.py +0 -18
  245. endoreg_db/management/commands/import_legacy_images.py +0 -94
  246. endoreg_db/management/commands/import_legacy_videos.py +0 -76
  247. endoreg_db/management/commands/load_label_data.py +0 -67
  248. endoreg_db/migrations/0002_anonymizedimagelabel_anonymousimageannotation_and_more.py +0 -55
  249. endoreg_db/migrations/0003_anonymousimageannotation_original_image_url_and_more.py +0 -39
  250. endoreg_db/migrations/0004_alter_rawpdffile_file.py +0 -20
  251. endoreg_db/migrations/0005_uploadedfile_alter_rawpdffile_file_anonymizedfile.py +0 -40
  252. endoreg_db/migrations/0006_alter_rawpdffile_file.py +0 -20
  253. endoreg_db/migrations/0007_networkdevicelogentry_datetime_and_more.py +0 -43
  254. endoreg_db/migrations/0008_networkdevicelogentry_aglnet_ip_and_more.py +0 -28
  255. endoreg_db/migrations/0009_alter_networkdevicelogentry_vpn_service_status.py +0 -18
  256. endoreg_db/migrations/0010_remove_networkdevicelogentry_hostname.py +0 -17
  257. endoreg_db/models/legacy_data/__init__.py +0 -3
  258. endoreg_db/models/legacy_data/image.py +0 -34
  259. endoreg_db/models/patient_examination/__init__.py +0 -35
  260. endoreg_db/utils/video_metadata.py +0 -87
  261. endoreg_db-0.5.3.dist-info/METADATA +0 -28
  262. endoreg_db-0.5.3.dist-info/RECORD +0 -319
  263. /endoreg_db/{models/persons/patient/case → case_generator}/__init__.py +0 -0
  264. /endoreg_db/data/{label → ai_model_label}/label-type/data.yaml +0 -0
  265. /endoreg_db/data/{model_type → ai_model_type}/data.yaml +0 -0
  266. /endoreg_db/{data/distribution/numeric/.init → management/__init__.py} +0 -0
  267. /endoreg_db/management/commands/{load_report_reader_flag.py → load_report_reader_flag_data.py} +0 -0
  268. {endoreg_db-0.5.3.dist-info → endoreg_db-0.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,77 +1,256 @@
1
- # import cv2
2
- from PIL import Image
3
- from django.core.files.base import ContentFile
1
+ """ """
2
+
3
+ from pathlib import Path
4
+ from collections import defaultdict, Counter
5
+ import shutil
6
+ import os
7
+ import subprocess
8
+ from typing import Optional, List, TYPE_CHECKING, Union
4
9
  from django.db import models, transaction
10
+ from icecream import ic
5
11
  from tqdm import tqdm
6
- # import cv2
7
- import io
8
- from datetime import date
9
-
10
- BATCH_SIZE = 1000
11
-
12
- class AbstractVideo(models.Model):
13
- file = models.FileField(upload_to="raw_videos", blank=True, null=True)
12
+ from endoreg_db.utils.hashs import get_video_hash
13
+ from endoreg_db.utils.file_operations import get_uuid_filename
14
+ from endoreg_db.utils.ocr import extract_text_from_rois
15
+
16
+ from ....utils.video import (
17
+ transcode_videofile,
18
+ transcode_videofile_if_required,
19
+ initialize_frame_objects,
20
+ extract_frames,
21
+ )
22
+
23
+ from ..metadata import VideoMeta, SensitiveMeta
24
+ from .utils import (
25
+ STORAGE_LOCATION,
26
+ VIDEO_DIR,
27
+ FRAME_DIR,
28
+ )
29
+ from .prepare_bulk_frames import prepare_bulk_frames
30
+
31
+ if TYPE_CHECKING:
32
+ from endoreg_db.models import (
33
+ VideoPredictionMeta,
34
+ Video,
35
+ RawVideoFile,
36
+ RawVideoPredictionMeta,
37
+ LabelRawVideoSegment,
38
+ LabelVideoSegment,
39
+ ModelMeta,
40
+ Center,
41
+ EndoscopyProcessor,
42
+ VideoMeta,
43
+ Frame,
44
+ RawFrame,
45
+ PatientExamination,
46
+ ) #
47
+ from django.db.models import QuerySet
48
+
49
+ TEST_RUN = os.environ.get("TEST_RUN", "False")
50
+ TEST_RUN = TEST_RUN.lower() == "true"
51
+
52
+ TEST_RUN_FRAME_NUMBER = int(os.environ.get("TEST_RUN_FRAME_NUMBER", "500"))
53
+
54
+ if TEST_RUN:
55
+ ic("-----\nTEST RUN ENABLED\n-----")
56
+
57
+
58
+ class AbstractVideoFile(models.Model):
59
+ """
60
+ Abstract base class for video files.
61
+ """
62
+
63
+ uuid = models.UUIDField()
64
+
65
+ sensitive_meta = models.OneToOneField(
66
+ "SensitiveMeta", on_delete=models.CASCADE, blank=True, null=True
67
+ )
68
+
69
+ center = models.ForeignKey("Center", on_delete=models.CASCADE)
70
+ processor = models.ForeignKey( # TODO Migrate to VideoMeta
71
+ "EndoscopyProcessor", on_delete=models.CASCADE, blank=True, null=True
72
+ )
73
+ # TODO Reduce redundancies between VideoFile and VideoMeta (e.g. center, processor)
74
+
75
+ video_meta = models.OneToOneField(
76
+ "VideoMeta", on_delete=models.CASCADE, blank=True, null=True
77
+ )
78
+ examination = models.ForeignKey(
79
+ "PatientExamination", on_delete=models.SET_NULL, blank=True, null=True
80
+ )
81
+ original_file_name = models.CharField(max_length=255)
14
82
  video_hash = models.CharField(max_length=255, unique=True)
15
- patient = models.ForeignKey("Patient", on_delete=models.CASCADE, blank=True, null=True)
16
- date = models.DateField(blank=True, null=True)
17
- suffix = models.CharField(max_length=255)
18
- fps = models.FloatField()
19
- duration = models.FloatField()
20
- width = models.IntegerField()
21
- height = models.IntegerField()
22
- endoscope_image_x = models.IntegerField(blank=True, null=True)
23
- endoscope_image_y = models.IntegerField(blank=True, null=True)
24
- endoscope_image_width = models.IntegerField(blank=True, null=True)
25
- endoscope_image_height = models.IntegerField(blank=True, null=True)
26
- center = models.ForeignKey("Center", on_delete=models.CASCADE, blank=True, null=True)
27
- endoscopy_processor = models.ForeignKey("EndoscopyProcessor", on_delete=models.CASCADE, blank=True, null=True)
28
- frames_extracted = models.BooleanField(default=False)
29
-
30
- meta = models.JSONField(blank=True, null=True)
83
+ uploaded_at = models.DateTimeField(auto_now_add=True)
84
+
85
+ frame_dir = models.CharField(max_length=255)
86
+ prediction_dir = models.CharField(max_length=255)
87
+ predictions = models.JSONField(default=dict)
88
+ fps = models.FloatField(blank=True, null=True)
89
+ duration = models.FloatField(blank=True, null=True)
90
+ frame_count = models.IntegerField(blank=True, null=True)
91
+
92
+ readable_predictions = models.JSONField(default=dict)
93
+ merged_predictions = models.JSONField(default=dict)
94
+ smooth_merged_predictions = models.JSONField(default=dict)
95
+ binary_smooth_merged_predictions = models.JSONField(default=dict)
96
+ sequences = models.JSONField(default=dict)
97
+ ai_model_meta = models.ForeignKey(
98
+ "ModelMeta", on_delete=models.CASCADE, blank=True, null=True
99
+ )
100
+
101
+ # Frame Extraction States
102
+ state_frames_required = models.BooleanField(default=True)
103
+ state_frames_extracted = models.BooleanField(default=False)
104
+
105
+ # Video
106
+ ## Prediction
107
+ state_initial_prediction_required = models.BooleanField(default=True)
108
+ state_initial_prediction_completed = models.BooleanField(default=False)
109
+ state_initial_prediction_import_required = models.BooleanField(default=True)
110
+ state_initial_prediction_import_completed = models.BooleanField(default=False)
111
+
112
+ # Dataset complete?
113
+ state_histology_required = models.BooleanField(blank=True, null=True)
114
+ state_histology_available = models.BooleanField(default=False)
115
+ state_follow_up_intervention_required = models.BooleanField(blank=True, null=True)
116
+ state_follow_up_intervention_available = models.BooleanField(default=False)
117
+ state_dataset_complete = models.BooleanField(default=False)
118
+
119
+ state_frames_initialized = models.BooleanField(default=False)
120
+
121
+ is_raw = models.BooleanField(default=False)
122
+
123
+ if TYPE_CHECKING:
124
+ self: Union["RawVideoFile", "Video"]
125
+ label_video_segments: Union[
126
+ "QuerySet[LabelVideoSegment]",
127
+ "QuerySet[LabelRawVideoSegment]",
128
+ ]
129
+ examination: "PatientExamination"
130
+ video_meta: "VideoMeta"
131
+ processor: "EndoscopyProcessor"
132
+ center: "Center"
133
+ ai_model_meta: "ModelMeta"
134
+ sensitive_meta: "SensitiveMeta"
135
+ frames: Union["QuerySet[RawFrame]", "QuerySet[Frame]"]
31
136
 
32
137
  class Meta:
33
- abstract = True
34
-
35
- def get_roi_endoscope_image(self):
36
- return {
37
- 'x': self.endoscope_image_content_x,
38
- 'y': self.endoscope_image_content_y,
39
- 'width': self.endoscope_image_content_width,
40
- 'height': self.endoscope_image_content_height,
41
- }
138
+ abstract = True #
139
+
140
+ @classmethod
141
+ def transcode_videofile(cls, filepath: Path, transcoded_path: Path):
142
+ """ """
143
+ transcoded_path = transcode_videofile(filepath, transcoded_path)
144
+ return transcoded_path
145
+
146
+ @classmethod
147
+ def check_hash_exists(cls, video_hash: str):
148
+ return cls.objects.filter(video_hash=video_hash).exists()
149
+
150
+ @classmethod
151
+ def create_from_file( # TODO Rename to get_or_create_from_file
152
+ cls,
153
+ file_path: Path,
154
+ center_name: str,
155
+ processor_name: str,
156
+ frame_dir_parent: Path = FRAME_DIR,
157
+ video_dir: Path = VIDEO_DIR,
158
+ save: bool = True,
159
+ frame_paths: List[dict] = None,
160
+ ):
161
+ from endoreg_db.models import Center, EndoscopyProcessor # pylint: disable=import-outside-toplevel
162
+
163
+ video_dir.mkdir(parents=True, exist_ok=True)
164
+ ic(f"Creating {cls} from {file_path}")
165
+ original_file_name = file_path.name
166
+
167
+ transcoded_file_path = transcode_videofile_if_required(file_path)
168
+ video_hash = get_video_hash(transcoded_file_path)
169
+
170
+ vid_with_hash_exists = cls.check_hash_exists(video_hash=video_hash) # pylint: disable=no-member
171
+ if vid_with_hash_exists:
172
+ existing: Union["RawVideoFile", "Video"] = cls.objects.get(
173
+ video_hash=video_hash
174
+ )
175
+ ic(f"Existing DB entry found: {existing}")
176
+
177
+ return existing
178
+
179
+ _new_file_name, uuid = get_uuid_filename(file_path)
180
+ ic(f"No existing DB entry found, creating new with UUID {uuid}")
181
+
182
+ try:
183
+ relative_path = transcoded_file_path.relative_to(STORAGE_LOCATION)
184
+ except ValueError as e:
185
+ raise Exception(
186
+ f"{transcoded_file_path} is outside STORAGE_LOCATION {STORAGE_LOCATION}"
187
+ ) from e
188
+
189
+ video = cls(
190
+ uuid=uuid,
191
+ file=relative_path.as_posix(),
192
+ center=Center.objects.get(name=center_name),
193
+ processor=EndoscopyProcessor.objects.get(name=processor_name),
194
+ original_file_name=original_file_name,
195
+ video_hash=video_hash,
196
+ )
197
+ video.save()
198
+ if frame_paths:
199
+ ic(f"Initializing frames using provided paths ({len(frame_paths)})")
200
+ video.initialize_frames(frame_paths)
42
201
 
43
- def initialize_metadata_in_db(self, video_meta=None):
44
- if not video_meta:
45
- video_meta = self.meta
46
- self.set_examination_date_from_video_meta(video_meta)
47
- self.patient, created = self.get_or_create_patient(video_meta)
48
- self.save()
202
+ # Save the instance to the database
203
+ video.save()
204
+ ic(f"Saved {video}")
49
205
 
50
- def get_or_create_patient(self, video_meta=None):
51
- from ...persons import Patient
52
- if not video_meta:
53
- video_meta = self.meta
206
+ return video
54
207
 
55
- patient_first_name = video_meta['patient_first_name']
56
- patient_last_name = video_meta['patient_last_name']
57
- patient_dob = video_meta['patient_dob']
208
+ def get_video_model(self):
209
+ from endoreg_db.models import RawVideoFile, Video
58
210
 
59
- # assert that we got all the necessary information
60
- assert patient_first_name and patient_last_name and patient_dob, "Missing patient information"
211
+ if self.is_raw:
212
+ return RawVideoFile
213
+ return Video
61
214
 
62
- patient, created = Patient.objects.get_or_create(
63
- first_name=patient_first_name,
64
- last_name=patient_last_name,
65
- dob=patient_dob
66
- )
215
+ def get_frame_model(self):
216
+ from endoreg_db.models import RawFrame, Frame
67
217
 
68
- return patient, created
218
+ if self.is_raw_video_file():
219
+ return RawFrame
69
220
 
70
- def get_frame_model(self):
71
- assert 1 == 2, "This method should be overridden in derived classes"
221
+ return Frame
72
222
 
73
- def get_video_model(self):
74
- assert 1 == 2, "This method should be overridden in derived classes"
223
+ def get_label_segment_model(self):
224
+ from endoreg_db.models import LabelVideoSegment, LabelRawVideoSegment # pylint: disable=import-outside-toplevel
225
+
226
+ if self.is_raw:
227
+ return LabelRawVideoSegment
228
+ return LabelVideoSegment
229
+
230
+ def sequences_to_label_video_segments(
231
+ self,
232
+ video_prediction_meta: Union["VideoPredictionMeta", "RawVideoPredictionMeta"],
233
+ ):
234
+ """
235
+ Convert sequences to label video segments.
236
+ """
237
+ from endoreg_db.models import Label, InformationSource
238
+
239
+ label_video_segment_model = self.get_label_segment_model()
240
+ for label, sequences in self.sequences.items():
241
+ label = Label.objects.get(name=label)
242
+ for sequence in sequences:
243
+ start_frame_number = sequence[0]
244
+ end_frame_number = sequence[1]
245
+
246
+ label_video_segment = label_video_segment_model.objects.create(
247
+ video=self,
248
+ prediction_meta=video_prediction_meta,
249
+ label=label,
250
+ start_frame_number=start_frame_number,
251
+ end_frame_number=end_frame_number,
252
+ )
253
+ label_video_segment.save()
75
254
 
76
255
  def get_frame_number(self):
77
256
  """
@@ -80,17 +259,17 @@ class AbstractVideo(models.Model):
80
259
  frame_model = self.get_frame_model()
81
260
  framecount = frame_model.objects.filter(video=self).count()
82
261
  return framecount
83
-
84
- def set_frames_extracted(self, value:bool=True):
85
- self.frames_extracted = value
262
+
263
+ def set_frames_extracted(self, value: bool = True):
264
+ self.state_frames_extracted = value
86
265
  self.save()
87
-
266
+
88
267
  def get_frames(self):
89
268
  """
90
269
  Retrieve all frames for this video in the correct order.
91
270
  """
92
271
  frame_model = self.get_frame_model()
93
- return frame_model.objects.filter(video=self).order_by('frame_number')
272
+ return frame_model.objects.filter(video=self).order_by("frame_number")
94
273
 
95
274
  def get_frame(self, frame_number):
96
275
  """
@@ -99,103 +278,530 @@ class AbstractVideo(models.Model):
99
278
  frame_model = self.get_frame_model()
100
279
  return frame_model.objects.get(video=self, frame_number=frame_number)
101
280
 
102
- def get_frame_range(self, start_frame_number:int, end_frame_number:int):
281
+ def get_frame_range(self, start_frame_number: int, end_frame_number: int):
103
282
  """
104
283
  Expects numbers of start and stop frame.
105
284
  Returns all frames of this video within the given range in ascending order.
106
285
  """
107
286
  frame_model = self.get_frame_model()
108
- return frame_model.objects.filter(video=self, frame_number__gte=start_frame_number, frame_number__lte=end_frame_number).order_by('frame_number')
287
+ return frame_model.objects.filter(
288
+ video=self,
289
+ frame_number__gte=start_frame_number,
290
+ frame_number__lte=end_frame_number,
291
+ ).order_by("frame_number")
292
+
293
+ def is_raw_video_file(self):
294
+ if self.__class__.__name__ == "RawVideoFile":
295
+ return True
296
+ return False
297
+
298
+ def get_prediction_meta_model(self):
299
+ from endoreg_db.models import VideoPredictionMeta, RawVideoPredictionMeta
300
+
301
+ if self.is_raw_video_file():
302
+ return RawVideoPredictionMeta
303
+ return VideoPredictionMeta
304
+
305
+ def predict_video(
306
+ self,
307
+ model_meta_name: str,
308
+ dataset_name: str = "inference_dataset",
309
+ model_meta_version: Optional[int] = None,
310
+ smooth_window_size_s: int = 1,
311
+ binarize_threshold: float = 0.5,
312
+ anonymized_frames: bool = True,
313
+ img_suffix: str = ".jpg",
314
+ test_run: bool = TEST_RUN,
315
+ n_test_frames: int = TEST_RUN_FRAME_NUMBER,
316
+ ):
317
+ """
318
+ WARNING: When using with Video Objects "anonymous_frames" should be set to False
319
+ Predict the video file using the given model.
320
+ Frames should be extracted and anonymized frames should be generated before prediction.
321
+ """
322
+ from endoreg_db.models import (
323
+ RawVideoFile,
324
+ Video,
325
+ ModelMeta,
326
+ AiModel,
327
+ ) # pylint: disable=import-outside-toplevel
328
+ from endo_ai.predictor.inference_dataset import InferenceDataset # pylint: disable=import-outside-toplevel
329
+ from endo_ai.predictor.model_loader import MultiLabelClassificationNet
330
+ from endo_ai.predictor.predict import Classifier
331
+ from endo_ai.predictor.postprocess import (
332
+ concat_pred_dicts,
333
+ make_smooth_preds,
334
+ find_true_pred_sequences,
335
+ )
109
336
 
110
- def _create_frame_object(self, frame_number, image_file):
111
- frame_model = self.get_frame_model()
112
- frame = frame_model(
113
- video=self,
114
- frame_number=frame_number,
115
- suffix='jpg',
337
+ if TEST_RUN:
338
+ test_run = True
339
+
340
+ datasets = {
341
+ "inference_dataset": InferenceDataset,
342
+ }
343
+
344
+ if isinstance(self, RawVideoFile):
345
+ self: "RawVideoFile"
346
+ elif isinstance(self, Video):
347
+ self: "Video"
348
+ else:
349
+ raise Exception("Invalid instance type")
350
+
351
+ dataset_model_class = datasets[dataset_name]
352
+
353
+ if anonymized_frames and self.is_raw:
354
+ try:
355
+ frame_dir = self.get_anonymized_frame_dir()
356
+ except: # FIXME
357
+ frame_dir = Path(self.frame_dir)
358
+ assert self.state_frames_extracted, "Frames not extracted"
359
+
360
+ else:
361
+ frame_dir = Path(self.frame_dir)
362
+ assert self.state_frames_extracted, "Frames not extracted"
363
+
364
+ model_meta = ModelMeta.get_by_name(model_meta_name, model_meta_version)
365
+ model: AiModel = model_meta.model
366
+
367
+ ic(f"Model: {model}, Model Meta: {model_meta}")
368
+
369
+ model_type = model.model_type
370
+ model_subtype = model.model_subtype
371
+
372
+ ic(f"Model type: {model_type}, Model subtype: {model_subtype}")
373
+ ic(f"self: {self}, Type: {type(self)}")
374
+
375
+ prediction_meta_model = self.get_prediction_meta_model()
376
+
377
+ ic(
378
+ f"Prediction Meta Model: {prediction_meta_model}, Type: {type(prediction_meta_model)}"
379
+ )
380
+
381
+ video_prediction_meta, _created = prediction_meta_model.objects.get_or_create(
382
+ video=self, model_meta=model_meta
383
+ )
384
+
385
+ video_prediction_meta.save()
386
+
387
+ ic(video_prediction_meta)
388
+
389
+ paths = self.get_frame_paths()
390
+ ic(f"Found {len(paths)} images in {frame_dir}")
391
+
392
+ # frame names in format "frame_{index}.jpg"
393
+ indices = [int(p.stem.split("_")[1]) for p in paths]
394
+ path_index_tuples = list(zip(paths, indices))
395
+ # sort ascending by index
396
+ path_index_tuples.sort(key=lambda x: x[1])
397
+ paths, indices = zip(*path_index_tuples)
398
+
399
+ crop_template = self.get_crop_template()
400
+
401
+ string_paths = [p.resolve().as_posix() for p in paths]
402
+ crops = [crop_template for _ in paths]
403
+
404
+ ic(f"Detected {len(paths)} frames")
405
+
406
+ if test_run: # only use the first 10 frames
407
+ ic(f"Running in test mode, using only the first {n_test_frames} frames")
408
+ paths = paths[:n_test_frames]
409
+ indices = indices[:n_test_frames]
410
+ string_paths = string_paths[:n_test_frames]
411
+ crops = crops[:n_test_frames]
412
+
413
+ assert paths, f"No images found in {frame_dir}"
414
+
415
+ ds_config = model_meta.get_inference_dataset_config()
416
+
417
+ # Create dataset
418
+ ds = dataset_model_class(string_paths, crops, config=ds_config)
419
+ ic(f"Dataset length: {len(ds)}")
420
+
421
+ # Get a sample image
422
+ sample = ds[0]
423
+ ic("Shape:", sample.shape) # e.g., torch.Size([3, 716, 716])
424
+
425
+ # unorm = get_unorm(ds_config)
426
+
427
+ weights_path = model_meta.weights.path
428
+
429
+ ic(f"Model path: {weights_path}")
430
+
431
+ # FIXME implement support for different model types
432
+ ai_model_instance = MultiLabelClassificationNet.load_from_checkpoint(
433
+ checkpoint_path=weights_path,
434
+ )
435
+
436
+ _ = ai_model_instance.cuda()
437
+ _ = ai_model_instance.eval()
438
+ classifier = Classifier(ai_model_instance, verbose=True)
439
+
440
+ ic("Starting inference")
441
+ predictions = classifier.pipe(string_paths, crops)
442
+
443
+ ic("Creating Prediction Dict")
444
+ prediction_dict = classifier.get_prediction_dict(predictions, string_paths)
445
+ self.predictions = prediction_dict
446
+
447
+ ic("Creating Readable Predictions")
448
+ readable_predictions = [classifier.readable(p) for p in predictions]
449
+ self.readable_predictions = readable_predictions
450
+
451
+ ic("Creating Merged Predictions")
452
+ merged_predictions = concat_pred_dicts(readable_predictions)
453
+
454
+ fps = self.get_fps()
455
+ ic(
456
+ f"Creating Smooth Merged Predictions; FPS: {fps}, \
457
+ Smooth Window Size: {smooth_window_size_s}"
458
+ )
459
+
460
+ smooth_merged_predictions = {}
461
+ for key in merged_predictions.keys():
462
+ smooth_merged_predictions[key] = make_smooth_preds(
463
+ prediction_array=merged_predictions[key],
464
+ window_size_s=smooth_window_size_s,
465
+ fps=fps,
116
466
  )
117
- frame.image_file = image_file # Temporary store the file-like object
118
467
 
119
- return frame
468
+ ic(
469
+ "Creating Binary Smooth Merged Predictions; Binarize Threshold: ",
470
+ binarize_threshold,
471
+ )
472
+ binary_smooth_merged_predictions = {}
473
+ for key in smooth_merged_predictions.keys(): # pylint: disable=consider-using-dict-items
474
+ binary_smooth_merged_predictions[key] = (
475
+ smooth_merged_predictions[key] > binarize_threshold
476
+ )
120
477
 
121
- def _bulk_create_frames(self, frames_to_create):
122
- frame_model = self.get_frame_model()
123
- with transaction.atomic():
124
- frame_model.objects.bulk_create(frames_to_create)
125
-
126
- # After the DB operation, save the ImageField for each object
127
- for frame in frames_to_create:
128
- frame_name = f"video_{self.id}_frame_{str(frame.frame_number).zfill(7)}.jpg"
129
- frame.image.save(frame_name, frame.image_file)
130
-
131
- # Clear the list for the next batch
132
- frames_to_create = []
133
-
134
- def set_examination_date_from_video_meta(self, video_meta=None):
135
- if not video_meta:
136
- video_meta = self.meta
137
- date_str = video_meta['examination_date'] # e.g. 2020-01-01
138
- if date_str:
139
- self.date = date.fromisoformat(date_str)
478
+ ic("Creating Sequences")
479
+ sequences = {}
480
+ for label, prediction_array in binary_smooth_merged_predictions.items():
481
+ sequences[label] = find_true_pred_sequences(prediction_array)
482
+
483
+ self.sequences = sequences
484
+
485
+ self.sequences_to_label_video_segments(
486
+ video_prediction_meta=video_prediction_meta
487
+ )
488
+
489
+ ic("Finished inference")
490
+ ic("Saving predictions to DB")
491
+ ic(sequences)
492
+ self.state_initial_prediction_required = False
493
+ self.state_initial_prediction_completed = True
494
+ self.save()
495
+
496
+ def get_outside_segments(self, outside_label_name: str = "outside"):
497
+ """
498
+ Get sequences of outside frames.
499
+ """
500
+ from endoreg_db.models import Label # pylint: disable=import-outside-toplevel
501
+
502
+ outside_label = Label.objects.get(name="outside")
503
+ assert outside_label is not None
504
+
505
+ outside_segments = self.label_video_segments.filter(label=outside_label)
506
+
507
+ ic(f"Getting outside sequences using label: {outside_label}")
508
+
509
+ return outside_segments
510
+
511
+ def get_outside_frames(self) -> List["Frame"]:
512
+ """
513
+ Get outside frames.
514
+ """
515
+ outside_segments = self.get_outside_segments()
516
+ frames = []
517
+ for segment in outside_segments:
518
+ frames.extend(segment.get_frames())
519
+ return frames
520
+
521
+ def get_outside_frame_paths(self):
522
+ """
523
+ Get paths to outside frames.
524
+ """
525
+ frames = self.get_outside_frames()
526
+
527
+ frame_paths = [Path(frame.image.path) for frame in frames]
528
+
529
+ return frame_paths
530
+
531
+ def __str__(self):
532
+ return self.file.name
533
+
534
+ def delete_with_file(self):
535
+ file_path = Path(self.file.path)
536
+ if file_path.exists():
537
+ file_path.unlink()
538
+ self.delete_frames()
539
+ self.delete_frames_anonymized()
540
+ self.delete()
541
+ return f"Deleted {self.file.name}; Deleted frames; Deleted anonymized frames"
542
+
543
+ def get_endo_roi(self):
544
+ """
545
+ Fetches the endoscope ROI from the video meta.
546
+ Returns a dictionary with keys "x", "y", "width", "height"
547
+ """
548
+ endo_roi = self.video_meta.get_endo_roi()
549
+ return endo_roi
550
+
551
+ def get_crop_template(self):
552
+ """
553
+ Creates a crop template (e.g., [0, 1080, 550, 1920 - 20] for a 1080p frame) from the endoscope ROI.
554
+ """
555
+ endo_roi = self.get_endo_roi()
556
+ x = endo_roi["x"]
557
+ y = endo_roi["y"]
558
+ width = endo_roi["width"]
559
+ height = endo_roi["height"]
560
+
561
+ crop_template = [y, y + height, x, x + width]
562
+ return crop_template
563
+
564
+ def set_frame_dir(self):
565
+ self.frame_dir = f"{FRAME_DIR}/{self.uuid}"
566
+
567
+ # video meta should be created when video file is created
568
+ def save(self, *args, **kwargs):
569
+ assert self.processor is not None, "Processor must be set"
570
+ if not self.fps:
571
+ self.fps = self.get_fps()
572
+ if self.is_raw_video_file():
573
+ self.is_raw = True
574
+ if self.video_meta is None:
575
+ center = self.center
576
+ processor = self.processor
577
+ self.video_meta = VideoMeta.objects.create(
578
+ center=center, processor=processor
579
+ )
580
+ self.video_meta.initialize_ffmpeg_meta(self.file.path)
581
+
582
+ if not self.frame_dir:
583
+ self.set_frame_dir()
584
+
585
+ sm = self.sensitive_meta
586
+ if sm:
587
+ self.patient = sm.pseudo_patient
588
+ self.patient_examination = sm.pseudo_examination
589
+
590
+ super(AbstractVideoFile, self).save(*args, **kwargs)
591
+
592
+ def extract_frames(
593
+ self,
594
+ quality: int = 2,
595
+ overwrite: bool = False,
596
+ ext="jpg",
597
+ verbose=False,
598
+ ) -> List[Path]:
599
+ """
600
+ Extract frames from the video file and save them to the frame_dir.
601
+ For this, ffmpeg must be available in in the current environment.
602
+ """
603
+
604
+ return extract_frames(
605
+ video=self,
606
+ quality=quality,
607
+ overwrite=overwrite,
608
+ ext=ext,
609
+ verbose=verbose,
610
+ )
611
+
612
+ def initialize_frames(self, paths: List[Path]):
613
+ """
614
+ Initialize frame objects for the video file.
615
+ """
616
+ initialize_frame_objects(self, paths)
617
+
618
+ def delete_frames(self):
619
+ """
620
+ Delete frames extracted from the video file.
621
+ """
622
+ frame_dir = Path(self.frame_dir)
623
+ if frame_dir.exists():
624
+ shutil.rmtree(frame_dir)
625
+ self.state_frames_extracted = False
140
626
  self.save()
627
+ return f"Frames deleted from {frame_dir}"
628
+ else:
629
+ return f"No frames to delete for {self.file.name}"
630
+
631
+ def delete_frames_anonymized(self):
632
+ """
633
+ Delete anonymized frames extracted from the video file.
634
+ """
635
+ frame_dir = Path(self.frame_dir)
636
+ anonymized_frame_dir = frame_dir.parent / f"anonymized_{self.uuid}"
637
+ if anonymized_frame_dir.exists():
638
+ shutil.rmtree(anonymized_frame_dir)
639
+ return f"Anonymized frames deleted from {anonymized_frame_dir}"
640
+ else:
641
+ return f"No anonymized frames to delete for {self.file.name}"
642
+
643
+ def get_frame_path(self, n: int = 0, anonymized=False):
644
+ """
645
+ Get the path to the n-th frame extracted from the video file.
646
+ Note that the frame numbering starts at 1 in our naming convention.
647
+ """
648
+ # Adjust index
649
+ n = n + 1
650
+
651
+ if anonymized:
652
+ _frame_dir = Path(self.frame_dir)
653
+ frame_dir = _frame_dir.parent / f"anonymized_{_frame_dir.name}"
654
+ else:
655
+ frame_dir = Path(self.frame_dir)
656
+ return frame_dir / f"frame_{n:07d}.jpg"
657
+
658
+ def get_frame_paths(self):
659
+ """
660
+ Get paths to frames extracted from the video file.
661
+ """
662
+ frames = self.frames.filter(extracted=True).order_by("frame_number")
663
+
664
+ paths = [Path(frame.image.path) for frame in frames]
665
+ return paths
666
+
667
+ def get_prediction_dir(self):
668
+ return Path(self.prediction_dir)
669
+
670
+ def get_predictions_path(self, suffix=".json"):
671
+ pred_dir = self.get_prediction_dir()
672
+ return pred_dir.joinpath("predictions").with_suffix(suffix)
673
+
674
+ def get_smooth_predictions_path(self, suffix=".json"):
675
+ pred_dir = self.get_prediction_dir()
676
+ return pred_dir.joinpath("smooth_predictions").with_suffix(suffix)
141
677
 
142
- def extract_all_frames(self):
678
+ def get_binary_predictions_path(self, suffix=".json"):
679
+ pred_dir = self.get_prediction_dir()
680
+ return pred_dir.joinpath("binary_predictions").with_suffix(suffix)
681
+
682
+ def get_raw_sequences_path(self, suffix=".json"):
683
+ pred_dir = self.get_prediction_dir()
684
+ return pred_dir.joinpath("raw_sequences").with_suffix(suffix)
685
+
686
+ def get_filtered_sequences_path(self, suffix=".json"):
687
+ pred_dir = self.get_prediction_dir()
688
+ return pred_dir.joinpath("filtered_sequences").with_suffix(suffix)
689
+
690
+ def extract_text_information(self, frame_fraction: float = 0.001):
143
691
  """
144
- Extract all frames from the video and store them in the database.
145
- Uses Django's bulk_create for more efficient database operations.
692
+ Extract text information from the video file.
693
+ Makes sure that frames are extracted and then processes the frames.
694
+ gets all frames from frame_dir and selects a fraction of them to process (at least 1)
146
695
  """
147
- # Open the video file
148
- video = cv2.VideoCapture(self.file.path)
696
+ if not self.state_frames_extracted:
697
+ print(f"Frames not extracted for {self.file.name}")
698
+ return None
149
699
 
150
- # Initialize video properties
151
- self.initialize_video_specs(video)
700
+ processor = self.processor
152
701
 
153
- # Prepare for batch operation
154
- frames_to_create = []
702
+ frame_paths = self.get_frame_paths()
703
+ n_frames = len(frame_paths)
704
+ n_frames_to_process = max(1, int(frame_fraction * n_frames))
155
705
 
156
- # Extract frames
157
- for frame_number in tqdm(range(int(self.duration * self.fps))):
158
- # Read the frame
159
- success, image = video.read()
160
- if not success:
161
- break
706
+ # Select evenly spaced frames
707
+ frame_paths = frame_paths[:: n_frames // n_frames_to_process]
162
708
 
163
- # Convert the numpy array to a PIL Image object
164
- pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
709
+ # extract text from each frame and store the value to
710
+ # defaultdict of lists.
711
+ # Then, extract the most frequent value from each list
712
+ # Finally, return the dictionary of most frequent values
165
713
 
166
- # Save the PIL Image to a buffer
167
- buffer = io.BytesIO()
168
- pil_image.save(buffer, format='JPEG')
714
+ # Create a defaultdict to store the extracted text from each ROI
715
+ rois_texts = defaultdict(list)
169
716
 
170
- # Create a file-like object from the byte data in the buffer
171
- image_file = ContentFile(buffer.getvalue())
717
+ print(f"Processing {n_frames_to_process} frames from {self.file.name}")
718
+ # Process frames
719
+ for frame_path in frame_paths[:n_frames_to_process]:
720
+ extracted_texts = extract_text_from_rois(frame_path, processor)
721
+ for roi, text in extracted_texts.items():
722
+ rois_texts[roi].append(text)
172
723
 
173
- # Prepare Frame instance (don't save yet)
174
- frame = self._create_frame_object(frame_number, image_file)
175
- frames_to_create.append(frame)
724
+ # Get the most frequent text values for each ROI using Counter
725
+ for key in rois_texts.keys(): # pylint: disable=consider-using-dict-items
726
+ counter = Counter([text for text in rois_texts[key] if text])
727
+ rois_texts[key] = counter.most_common(1)[0][0] if counter else None
176
728
 
177
- # Perform bulk create when reaching BATCH_SIZE
178
- if len(frames_to_create) >= BATCH_SIZE:
179
- self._bulk_create_frames(frames_to_create)
180
- frames_to_create = []
729
+ return rois_texts
181
730
 
731
+ def update_text_metadata(self, ocr_frame_fraction=0.001):
732
+ ic(f"Updating metadata for {self.file.name}")
733
+ extracted_data_dict = self.extract_text_information(ocr_frame_fraction)
734
+ if extracted_data_dict is None:
735
+ ic("No text extracted; skipping metadata update.")
736
+ return
737
+ extracted_data_dict["center_name"] = self.center.name
182
738
 
183
- # Handle remaining frames
184
- if frames_to_create:
185
- self._bulk_create_frames(frames_to_create)
186
- frames_to_create = []
739
+ ic(extracted_data_dict)
187
740
 
188
- # Close the video file
189
- video.release()
190
- self.set_frames_extracted(True)
741
+ extracted_data_dict["center_name"] = self.center.name
191
742
 
743
+ ic("____________")
744
+ ic(extracted_data_dict)
745
+ ic("____________")
192
746
 
193
- def initialize_video_specs(self, video):
747
+ self.sensitive_meta = SensitiveMeta.create_from_dict(extracted_data_dict)
748
+ self.state_sensitive_data_retrieved = True # pylint: disable=attribute-defined-outside-init
749
+ self.save()
750
+
751
+ # Resulting dict depends on defined ROIs for this processor type!
752
+
753
+ def update_video_meta(self):
754
+ video_meta = self.video_meta
755
+ video_path = Path(self.file.path)
756
+ center = self.center
757
+ assert self.processor
758
+
759
+ if video_meta is None:
760
+ video_meta = VideoMeta.create_from_file(
761
+ video_path,
762
+ center=center,
763
+ processor=self.processor,
764
+ )
765
+ self.video_meta = video_meta
766
+ self.save()
767
+
768
+ else:
769
+ video_meta.update_meta(video_path)
770
+
771
+ def get_fps(self):
772
+ # # FIXME
773
+ # fps = 50
774
+ # return fps
775
+
776
+ if self.video_meta is None:
777
+ self.update_video_meta()
778
+
779
+ if self.video_meta.ffmpeg_meta is None:
780
+ self.video_meta.initialize_ffmpeg_meta(self.file.path)
781
+
782
+ return self.video_meta.get_fps()
783
+
784
+ def create_frame_object(
785
+ self, frame_number, image_file=None, extracted: bool = False
786
+ ):
787
+ """
788
+ Returns a frame instance with the image_file set.
789
+ """
790
+ frame_model = self.get_frame_model()
791
+ return frame_model(
792
+ video=self,
793
+ frame_number=frame_number,
794
+ image=image_file,
795
+ extracted=extracted,
796
+ )
797
+
798
+ def bulk_create_frames(self, frames_to_create):
194
799
  """
195
- Initialize and save video metadata like framerate, dimensions, and duration.
800
+ Bulk create frames, then save their images to storage.
196
801
  """
197
- self.fps = video.get(cv2.CAP_PROP_FPS)
198
- self.width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
199
- self.height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
200
- self.duration = video.get(cv2.CAP_PROP_FRAME_COUNT) / self.fps
201
- self.save()
802
+ frame_model = self.get_frame_model()
803
+ created = frame_model.objects.bulk_create(frames_to_create)
804
+ # for frame in created:
805
+ # frame_name = f"frame_{frame.frame_number:07d}.jpg"
806
+ # frame.image.save(frame_name, frame.image)
807
+ # frame.save()