endoreg-db 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (268) hide show
  1. endoreg_db/admin.py +90 -1
  2. endoreg_db/case_generator/case_generator.py +159 -0
  3. endoreg_db/case_generator/lab_sample_factory.py +33 -0
  4. endoreg_db/case_generator/utils.py +30 -0
  5. endoreg_db/data/__init__.py +50 -4
  6. endoreg_db/data/ai_model/data.yaml +7 -0
  7. endoreg_db/data/{label → ai_model_label}/label/data.yaml +27 -1
  8. endoreg_db/data/ai_model_label/label-set/data.yaml +21 -0
  9. endoreg_db/data/ai_model_meta/default_multilabel_classification.yaml +5 -0
  10. endoreg_db/data/ai_model_video_segmentation_label/base_segmentation.yaml +176 -0
  11. endoreg_db/data/ai_model_video_segmentation_labelset/data.yaml +20 -0
  12. endoreg_db/data/center/data.yaml +35 -5
  13. endoreg_db/data/contraindication/bleeding.yaml +11 -0
  14. endoreg_db/data/distribution/numeric/data.yaml +14 -0
  15. endoreg_db/data/endoscope/data.yaml +93 -0
  16. endoreg_db/data/examination_indication/endoscopy.yaml +8 -0
  17. endoreg_db/data/examination_indication_classification/endoscopy.yaml +8 -0
  18. endoreg_db/data/examination_indication_classification_choice/endoscopy.yaml +101 -0
  19. endoreg_db/data/finding/data.yaml +141 -0
  20. endoreg_db/data/finding_intervention/endoscopy.yaml +138 -0
  21. endoreg_db/data/finding_intervention_type/endoscopy.yaml +15 -0
  22. endoreg_db/data/finding_location_classification/colonoscopy.yaml +46 -0
  23. endoreg_db/data/finding_location_classification_choice/colonoscopy.yaml +240 -0
  24. endoreg_db/data/finding_morphology_classification/colonoscopy.yaml +48 -0
  25. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_circularity_default.yaml +34 -0
  26. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_nice.yaml +20 -0
  27. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_paris.yaml +65 -0
  28. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_planarity_default.yaml +56 -0
  29. endoreg_db/data/finding_morphology_classification_choice/colon_lesion_surface_intact_default.yaml +39 -0
  30. endoreg_db/data/finding_morphology_classification_choice/colonoscopy_size.yaml +57 -0
  31. endoreg_db/data/finding_morphology_classification_type/colonoscopy.yaml +79 -0
  32. endoreg_db/data/finding_type/data.yaml +30 -0
  33. endoreg_db/data/gender/data.yaml +17 -0
  34. endoreg_db/data/lab_value/cardiac_enzymes.yaml +7 -1
  35. endoreg_db/data/lab_value/coagulation.yaml +6 -1
  36. endoreg_db/data/lab_value/electrolytes.yaml +39 -1
  37. endoreg_db/data/lab_value/gastrointestinal_function.yaml +12 -0
  38. endoreg_db/data/lab_value/hematology.yaml +17 -2
  39. endoreg_db/data/lab_value/hormones.yaml +6 -0
  40. endoreg_db/data/lab_value/lipids.yaml +12 -3
  41. endoreg_db/data/lab_value/misc.yaml +5 -2
  42. endoreg_db/data/lab_value/renal_function.yaml +2 -1
  43. endoreg_db/data/lx_client_tag/base.yaml +54 -0
  44. endoreg_db/data/lx_client_type/base.yaml +30 -0
  45. endoreg_db/data/lx_permission/base.yaml +24 -0
  46. endoreg_db/data/lx_permission/endoreg.yaml +52 -0
  47. endoreg_db/data/medication_indication/anticoagulation.yaml +44 -49
  48. endoreg_db/data/names_first/first_names.yaml +51 -0
  49. endoreg_db/data/names_last/last_names.yaml +51 -0
  50. endoreg_db/data/network_device/data.yaml +30 -0
  51. endoreg_db/data/organ/data.yaml +29 -0
  52. endoreg_db/data/pdf_type/data.yaml +2 -1
  53. endoreg_db/data/report_reader_flag/ukw-examination-generic.yaml +4 -0
  54. endoreg_db/forms/__init__.py +3 -1
  55. endoreg_db/forms/examination_form.py +11 -0
  56. endoreg_db/forms/patient_finding_intervention_form.py +19 -0
  57. endoreg_db/forms/patient_form.py +26 -0
  58. endoreg_db/management/commands/__init__.py +0 -0
  59. endoreg_db/management/commands/load_ai_model_data.py +57 -23
  60. endoreg_db/management/commands/load_ai_model_label_data.py +59 -0
  61. endoreg_db/management/commands/load_base_db_data.py +160 -118
  62. endoreg_db/management/commands/{load_endoscope_type_data.py → load_contraindication_data.py} +3 -7
  63. endoreg_db/management/commands/load_disease_data.py +29 -7
  64. endoreg_db/management/commands/load_endoscope_data.py +68 -0
  65. endoreg_db/management/commands/load_examination_indication_data.py +65 -0
  66. endoreg_db/management/commands/load_finding_data.py +171 -0
  67. endoreg_db/management/commands/load_lab_value_data.py +3 -3
  68. endoreg_db/management/commands/load_lx_data.py +64 -0
  69. endoreg_db/management/commands/load_medication_data.py +83 -21
  70. endoreg_db/management/commands/load_name_data.py +37 -0
  71. endoreg_db/management/commands/{load_endoscopy_processor_data.py → load_organ_data.py} +7 -9
  72. endoreg_db/migrations/0001_initial.py +1206 -728
  73. endoreg_db/migrations/0002_alter_frame_image_alter_rawframe_image.py +23 -0
  74. endoreg_db/migrations/0003_alter_frame_image_alter_rawframe_image.py +23 -0
  75. endoreg_db/migrations/0004_alter_rawvideofile_file_alter_video_file.py +25 -0
  76. endoreg_db/migrations/0005_rawvideofile_frame_count_and_more.py +33 -0
  77. endoreg_db/migrations/0006_frame_extracted_rawframe_extracted.py +23 -0
  78. endoreg_db/migrations/0007_rename_pseudo_patient_video_patient_and_more.py +24 -0
  79. endoreg_db/migrations/0008_remove_reportfile_patient_examination_and_more.py +48 -0
  80. endoreg_db/models/__init__.py +331 -28
  81. endoreg_db/models/ai_model/__init__.py +1 -0
  82. endoreg_db/models/ai_model/ai_model.py +103 -0
  83. endoreg_db/models/ai_model/lightning/__init__.py +3 -0
  84. endoreg_db/models/ai_model/lightning/inference_dataset.py +53 -0
  85. endoreg_db/models/ai_model/lightning/multilabel_classification_net.py +155 -0
  86. endoreg_db/models/ai_model/lightning/postprocess.py +53 -0
  87. endoreg_db/models/ai_model/lightning/predict.py +172 -0
  88. endoreg_db/models/ai_model/lightning/prediction_visualizer.py +55 -0
  89. endoreg_db/models/ai_model/lightning/preprocess.py +68 -0
  90. endoreg_db/models/ai_model/lightning/run_visualizer.py +21 -0
  91. endoreg_db/models/ai_model/model_meta.py +232 -6
  92. endoreg_db/models/ai_model/model_type.py +13 -3
  93. endoreg_db/models/annotation/__init__.py +31 -2
  94. endoreg_db/models/annotation/anonymized_image_annotation.py +73 -18
  95. endoreg_db/models/annotation/binary_classification_annotation_task.py +94 -57
  96. endoreg_db/models/annotation/image_classification.py +73 -14
  97. endoreg_db/models/annotation/video_segmentation_annotation.py +52 -0
  98. endoreg_db/models/annotation/video_segmentation_labelset.py +20 -0
  99. endoreg_db/models/case/__init__.py +1 -0
  100. endoreg_db/models/{persons/patient/case → case}/case.py +4 -0
  101. endoreg_db/models/case_template/__init__.py +10 -1
  102. endoreg_db/models/case_template/case_template.py +57 -13
  103. endoreg_db/models/case_template/case_template_rule.py +5 -5
  104. endoreg_db/models/case_template/case_template_rule_value.py +19 -4
  105. endoreg_db/models/center/__init__.py +7 -0
  106. endoreg_db/models/center/center.py +31 -5
  107. endoreg_db/models/center/center_product.py +0 -1
  108. endoreg_db/models/center/center_resource.py +16 -2
  109. endoreg_db/models/center/center_waste.py +6 -1
  110. endoreg_db/models/contraindication/__init__.py +21 -0
  111. endoreg_db/models/data_file/__init__.py +38 -5
  112. endoreg_db/models/data_file/base_classes/__init__.py +6 -1
  113. endoreg_db/models/data_file/base_classes/abstract_frame.py +64 -15
  114. endoreg_db/models/data_file/base_classes/abstract_pdf.py +136 -0
  115. endoreg_db/models/data_file/base_classes/abstract_video.py +744 -138
  116. endoreg_db/models/data_file/base_classes/frame_helpers.py +17 -0
  117. endoreg_db/models/data_file/base_classes/prepare_bulk_frames.py +19 -0
  118. endoreg_db/models/data_file/base_classes/utils.py +80 -0
  119. endoreg_db/models/data_file/frame.py +22 -38
  120. endoreg_db/models/data_file/import_classes/__init__.py +4 -18
  121. endoreg_db/models/data_file/import_classes/raw_pdf.py +162 -90
  122. endoreg_db/models/data_file/import_classes/raw_video.py +239 -294
  123. endoreg_db/models/data_file/metadata/__init__.py +10 -0
  124. endoreg_db/models/data_file/metadata/pdf_meta.py +4 -0
  125. endoreg_db/models/data_file/metadata/sensitive_meta.py +265 -6
  126. endoreg_db/models/data_file/metadata/video_meta.py +116 -50
  127. endoreg_db/models/data_file/report_file.py +30 -63
  128. endoreg_db/models/data_file/video/__init__.py +6 -2
  129. endoreg_db/models/data_file/video/video.py +187 -16
  130. endoreg_db/models/data_file/video_segment.py +162 -55
  131. endoreg_db/models/disease.py +25 -2
  132. endoreg_db/models/emission/__init__.py +5 -1
  133. endoreg_db/models/emission/emission_factor.py +71 -6
  134. endoreg_db/models/event.py +51 -0
  135. endoreg_db/models/examination/__init__.py +6 -1
  136. endoreg_db/models/examination/examination.py +53 -12
  137. endoreg_db/models/examination/examination_indication.py +170 -0
  138. endoreg_db/models/examination/examination_time.py +31 -5
  139. endoreg_db/models/examination/examination_time_type.py +28 -4
  140. endoreg_db/models/examination/examination_type.py +28 -6
  141. endoreg_db/models/finding/__init__.py +11 -0
  142. endoreg_db/models/finding/finding.py +75 -0
  143. endoreg_db/models/finding/finding_intervention.py +60 -0
  144. endoreg_db/models/finding/finding_location_classification.py +94 -0
  145. endoreg_db/models/finding/finding_morphology_classification.py +89 -0
  146. endoreg_db/models/finding/finding_type.py +22 -0
  147. endoreg_db/models/hardware/endoscope.py +16 -0
  148. endoreg_db/models/hardware/endoscopy_processor.py +31 -19
  149. endoreg_db/models/label/label.py +35 -7
  150. endoreg_db/models/laboratory/lab_value.py +12 -3
  151. endoreg_db/models/logging/__init__.py +8 -1
  152. endoreg_db/models/lx/__init__.py +4 -0
  153. endoreg_db/models/lx/client.py +57 -0
  154. endoreg_db/models/lx/identity.py +34 -0
  155. endoreg_db/models/lx/permission.py +18 -0
  156. endoreg_db/models/lx/user.py +16 -0
  157. endoreg_db/models/medication/__init__.py +19 -1
  158. endoreg_db/models/medication/medication.py +7 -122
  159. endoreg_db/models/medication/medication_indication.py +50 -0
  160. endoreg_db/models/medication/medication_indication_type.py +34 -0
  161. endoreg_db/models/medication/medication_intake_time.py +26 -0
  162. endoreg_db/models/medication/medication_schedule.py +37 -0
  163. endoreg_db/models/network/__init__.py +7 -1
  164. endoreg_db/models/network/network_device.py +13 -8
  165. endoreg_db/models/organ/__init__.py +38 -0
  166. endoreg_db/models/other/__init__.py +19 -1
  167. endoreg_db/models/other/distribution/__init__.py +44 -0
  168. endoreg_db/models/other/distribution/base_value_distribution.py +20 -0
  169. endoreg_db/models/other/distribution/date_value_distribution.py +91 -0
  170. endoreg_db/models/other/distribution/multiple_categorical_value_distribution.py +32 -0
  171. endoreg_db/models/other/distribution/numeric_value_distribution.py +97 -0
  172. endoreg_db/models/other/distribution/single_categorical_value_distribution.py +22 -0
  173. endoreg_db/models/other/distribution.py +1 -211
  174. endoreg_db/models/other/material.py +4 -0
  175. endoreg_db/models/other/transport_route.py +2 -1
  176. endoreg_db/models/patient/__init__.py +24 -0
  177. endoreg_db/models/patient/patient_examination.py +182 -0
  178. endoreg_db/models/patient/patient_finding.py +143 -0
  179. endoreg_db/models/patient/patient_finding_intervention.py +26 -0
  180. endoreg_db/models/patient/patient_finding_location.py +120 -0
  181. endoreg_db/models/patient/patient_finding_morphology.py +166 -0
  182. endoreg_db/models/persons/__init__.py +29 -2
  183. endoreg_db/models/persons/examiner/examiner.py +48 -4
  184. endoreg_db/models/persons/patient/__init__.py +1 -1
  185. endoreg_db/models/persons/patient/patient.py +227 -54
  186. endoreg_db/models/persons/patient/patient_disease.py +6 -0
  187. endoreg_db/models/persons/patient/patient_event.py +31 -1
  188. endoreg_db/models/persons/patient/patient_examination_indication.py +32 -0
  189. endoreg_db/models/persons/patient/patient_lab_sample.py +4 -2
  190. endoreg_db/models/persons/patient/patient_lab_value.py +37 -16
  191. endoreg_db/models/persons/patient/patient_medication.py +27 -12
  192. endoreg_db/models/persons/patient/patient_medication_schedule.py +62 -2
  193. endoreg_db/models/prediction/__init__.py +7 -1
  194. endoreg_db/models/prediction/image_classification.py +20 -6
  195. endoreg_db/models/prediction/video_prediction_meta.py +151 -89
  196. endoreg_db/models/product/__init__.py +10 -1
  197. endoreg_db/models/product/product.py +15 -2
  198. endoreg_db/models/product/product_group.py +8 -0
  199. endoreg_db/models/product/product_material.py +4 -0
  200. endoreg_db/models/product/product_weight.py +12 -0
  201. endoreg_db/models/product/reference_product.py +19 -3
  202. endoreg_db/models/quiz/__init__.py +8 -1
  203. endoreg_db/models/report_reader/__init__.py +6 -1
  204. endoreg_db/serializers/__init__.py +1 -1
  205. endoreg_db/serializers/annotation.py +2 -5
  206. endoreg_db/serializers/frame.py +1 -5
  207. endoreg_db/serializers/patient.py +26 -3
  208. endoreg_db/serializers/prediction.py +2 -7
  209. endoreg_db/serializers/raw_video_meta_validation.py +13 -0
  210. endoreg_db/serializers/video.py +6 -13
  211. endoreg_db/serializers/video_segmentation.py +492 -0
  212. endoreg_db/templates/admin/patient_finding_intervention.html +253 -0
  213. endoreg_db/templates/admin/start_examination.html +12 -0
  214. endoreg_db/templates/timeline.html +176 -0
  215. endoreg_db/urls.py +173 -0
  216. endoreg_db/utils/__init__.py +36 -1
  217. endoreg_db/utils/dataloader.py +45 -19
  218. endoreg_db/utils/dates.py +39 -0
  219. endoreg_db/utils/hashs.py +122 -4
  220. endoreg_db/utils/names.py +74 -0
  221. endoreg_db/utils/parse_and_generate_yaml.py +46 -0
  222. endoreg_db/utils/pydantic_models/__init__.py +6 -0
  223. endoreg_db/utils/pydantic_models/db_config.py +57 -0
  224. endoreg_db/utils/validate_endo_roi.py +19 -0
  225. endoreg_db/utils/validate_subcategory_dict.py +91 -0
  226. endoreg_db/utils/video/__init__.py +13 -0
  227. endoreg_db/utils/video/extract_frames.py +121 -0
  228. endoreg_db/utils/video/transcode_videofile.py +111 -0
  229. endoreg_db/views/__init__.py +2 -0
  230. endoreg_db/views/csrf.py +7 -0
  231. endoreg_db/views/patient_views.py +90 -0
  232. endoreg_db/views/raw_video_meta_validation_views.py +38 -0
  233. endoreg_db/views/report_views.py +96 -0
  234. endoreg_db/views/video_segmentation_views.py +149 -0
  235. endoreg_db/views/views_for_timeline.py +46 -0
  236. endoreg_db/views.py +0 -3
  237. endoreg_db-0.6.1.dist-info/METADATA +151 -0
  238. endoreg_db-0.6.1.dist-info/RECORD +420 -0
  239. {endoreg_db-0.5.3.dist-info → endoreg_db-0.6.1.dist-info}/WHEEL +1 -1
  240. endoreg_db/data/active_model/data.yaml +0 -3
  241. endoreg_db/data/label/label-set/data.yaml +0 -18
  242. endoreg_db/management/commands/delete_legacy_images.py +0 -19
  243. endoreg_db/management/commands/delete_legacy_videos.py +0 -17
  244. endoreg_db/management/commands/extract_legacy_video_frames.py +0 -18
  245. endoreg_db/management/commands/import_legacy_images.py +0 -94
  246. endoreg_db/management/commands/import_legacy_videos.py +0 -76
  247. endoreg_db/management/commands/load_label_data.py +0 -67
  248. endoreg_db/migrations/0002_anonymizedimagelabel_anonymousimageannotation_and_more.py +0 -55
  249. endoreg_db/migrations/0003_anonymousimageannotation_original_image_url_and_more.py +0 -39
  250. endoreg_db/migrations/0004_alter_rawpdffile_file.py +0 -20
  251. endoreg_db/migrations/0005_uploadedfile_alter_rawpdffile_file_anonymizedfile.py +0 -40
  252. endoreg_db/migrations/0006_alter_rawpdffile_file.py +0 -20
  253. endoreg_db/migrations/0007_networkdevicelogentry_datetime_and_more.py +0 -43
  254. endoreg_db/migrations/0008_networkdevicelogentry_aglnet_ip_and_more.py +0 -28
  255. endoreg_db/migrations/0009_alter_networkdevicelogentry_vpn_service_status.py +0 -18
  256. endoreg_db/migrations/0010_remove_networkdevicelogentry_hostname.py +0 -17
  257. endoreg_db/models/legacy_data/__init__.py +0 -3
  258. endoreg_db/models/legacy_data/image.py +0 -34
  259. endoreg_db/models/patient_examination/__init__.py +0 -35
  260. endoreg_db/utils/video_metadata.py +0 -87
  261. endoreg_db-0.5.3.dist-info/METADATA +0 -28
  262. endoreg_db-0.5.3.dist-info/RECORD +0 -319
  263. /endoreg_db/{models/persons/patient/case → case_generator}/__init__.py +0 -0
  264. /endoreg_db/data/{label → ai_model_label}/label-type/data.yaml +0 -0
  265. /endoreg_db/data/{model_type → ai_model_type}/data.yaml +0 -0
  266. /endoreg_db/{data/distribution/numeric/.init → management/__init__.py} +0 -0
  267. /endoreg_db/management/commands/{load_report_reader_flag.py → load_report_reader_flag_data.py} +0 -0
  268. {endoreg_db-0.5.3.dist-info → endoreg_db-0.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,17 @@
1
+ from django.core.files import File
2
+ import io
3
+ from pathlib import Path
4
+ from typing import List
5
+
6
+
7
+ def prepare_bulk_frames(frame_paths: List[Path]):
8
+ """
9
+ Reads the frame paths into memory as Django File objects.
10
+ This avoids 'seek of closed file' errors by using BytesIO for each frame.
11
+ """
12
+ for path in frame_paths:
13
+ frame_number = int(path.stem.split("_")[1])
14
+ with open(path, "rb") as f:
15
+ content = f.read()
16
+ file_obj = File(io.BytesIO(content), name=path.name)
17
+ yield frame_number, file_obj
@@ -0,0 +1,19 @@
1
+ from django.core.files import File
2
+
3
+
4
+ import io
5
+ from pathlib import Path
6
+ from typing import List
7
+
8
+
9
+ def prepare_bulk_frames(frame_paths: List[Path]):
10
+ """
11
+ Reads the frame paths into memory as Django File objects.
12
+ This avoids 'seek of closed file' errors by using BytesIO for each frame.
13
+ """
14
+ for path in frame_paths:
15
+ frame_number = int(path.stem.split("_")[1])
16
+ with open(path, "rb") as f:
17
+ content = f.read()
18
+ file_obj = File(io.BytesIO(content), name=path.name)
19
+ yield frame_number, file_obj
@@ -0,0 +1,80 @@
1
+ """
2
+ Utility functions for data file classes.
3
+ """
4
+
5
+ import os
6
+ from pathlib import Path
7
+ import cv2
8
+ import numpy as np
9
+
10
+ DJANGO_NAME_SALT = os.environ.get("DJANGO_NAME_SALT", "default_salt")
11
+
12
+ # Directory stuff
13
+ PSEUDO_DIR = Path(os.environ.get("DJANGO_PSEUDO_DIR", Path("./erc_data")))
14
+ STORAGE_LOCATION = PSEUDO_DIR
15
+ FRAME_DIR_NAME = os.environ.get("DJANGO_FRAME_DIR_NAME", "db_frames")
16
+ RAW_FRAME_DIR_NAME = os.environ.get("DJANGO_RAW_FRAME_DIR_NAME", "db_raw_frames")
17
+ VIDEO_DIR_NAME = os.environ.get("DJANGO_VIDEO_DIR_NAME", "db_videos")
18
+ RAW_VIDEO_DIR_NAME = os.environ.get("DJANGO_RAW_VIDEO_DIR_NAME", "db_raw_videos")
19
+
20
+ FRAME_DIR = STORAGE_LOCATION / FRAME_DIR_NAME
21
+ VIDEO_DIR = STORAGE_LOCATION / VIDEO_DIR_NAME
22
+ RAW_VIDEO_DIR = STORAGE_LOCATION / RAW_VIDEO_DIR_NAME
23
+
24
+ TEST_RUN = os.environ.get("TEST_RUN", False)
25
+ TEST_RUN_FRAME_NUMBER = os.environ.get("TEST_RUN_FRAME_NUMBER", 1000)
26
+
27
+ VIDEO_DIR.mkdir(parents=True, exist_ok=True)
28
+ RAW_VIDEO_DIR.mkdir(parents=True, exist_ok=True)
29
+
30
+ # AI Stuff
31
+ FRAME_PROCESSING_BATCH_SIZE = os.environ.get("DJANGO_FRAME_PROCESSING_BATCH_SIZE", 10)
32
+
33
+
34
+ def anonymize_frame(
35
+ raw_frame_path: Path, target_frame_path: Path, endo_roi, all_black: bool = False
36
+ ):
37
+ """
38
+ Anonymize the frame by blacking out all pixels that are not in the endoscope ROI.
39
+ """
40
+
41
+ frame = cv2.imread(raw_frame_path.as_posix()) # pylint: disable=no-member
42
+
43
+ # make black frame with same size as original frame
44
+ new_frame = np.zeros_like(frame)
45
+
46
+ if not all_black:
47
+ # endo_roi is dict with keys "x", "y", "width", "heigth"
48
+ x = endo_roi["x"]
49
+ y = endo_roi["y"]
50
+ width = endo_roi["width"]
51
+ height = endo_roi["height"]
52
+
53
+ # copy endoscope roi to black frame
54
+ new_frame[y : y + height, x : x + width] = frame[y : y + height, x : x + width]
55
+ cv2.imwrite(target_frame_path.as_posix(), new_frame) # pylint: disable=no-member
56
+
57
+ return frame
58
+
59
+
60
+ def copy_with_progress(src: str, dst: str, buffer_size=1024 * 1024):
61
+ """
62
+ Make a copy of a file with progress bar.
63
+
64
+ Args:
65
+ src (str): Source file path.
66
+ dst (str): Destination file path.
67
+ buffer_size (int): Buffer size for copying.
68
+ """
69
+ total_size = os.path.getsize(src)
70
+ copied_size = 0
71
+
72
+ with open(src, "rb") as fsrc, open(dst, "wb") as fdst:
73
+ while True:
74
+ buf = fsrc.read(buffer_size)
75
+ if not buf:
76
+ break
77
+ fdst.write(buf)
78
+ copied_size += len(buf)
79
+ progress = copied_size / total_size * 100
80
+ print(f"\rProgress: {progress:.2f}%", end="")
@@ -1,45 +1,29 @@
1
- from endoreg_db.models.annotation.image_classification import ImageClassificationAnnotation
1
+ from typing import TYPE_CHECKING
2
+
2
3
  from endoreg_db.models.label.label import Label
3
4
  from .base_classes import AbstractFrame
4
5
  from django.db import models
5
6
 
7
+ from .base_classes.utils import FRAME_DIR_NAME, RAW_FRAME_DIR_NAME
8
+
9
+ if TYPE_CHECKING:
10
+ from endoreg_db.models import RawVideoFile, Video
11
+
12
+
13
+ class RawFrame(AbstractFrame):
14
+ image = models.ImageField(upload_to=RAW_FRAME_DIR_NAME, blank=True, null=True)
15
+ video = models.ForeignKey(
16
+ "RawVideoFile", on_delete=models.CASCADE, related_name="frames"
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ video: "RawVideoFile"
21
+ label: "Label"
22
+
23
+
6
24
  class Frame(AbstractFrame):
25
+ image = models.ImageField(upload_to=FRAME_DIR_NAME, blank=True, null=True)
7
26
  video = models.ForeignKey("Video", on_delete=models.CASCADE, related_name="frames")
8
27
 
9
- class LegacyFrame(AbstractFrame):
10
- video = models.ForeignKey("LegacyVideo", on_delete=models.CASCADE, related_name='frames')
11
- image = models.ImageField(upload_to="legacy_frames", blank=True, null=True)
12
- suffix = models.CharField(max_length=255)
13
- # ImageClassificationAnnotation has a foreign key to this model (related name: image_classification_annotations)
14
-
15
- class Meta:
16
- unique_together = ('video', 'frame_number')
17
- indexes = [
18
- models.Index(fields=['video', 'frame_number']),
19
- ]
20
-
21
- def get_classification_annotations(self):
22
- """
23
- Get all image classification annotations for this frame.
24
- """
25
- return ImageClassificationAnnotation.objects.filter(legacy_frame=self)
26
-
27
- def get_classification_annotations_by_label(self, label:Label):
28
- """
29
- Get all image classification annotations for this frame with the given label.
30
- """
31
- return ImageClassificationAnnotation.objects.filter(legacy_frame=self, label=label)
32
-
33
- def get_classification_annotations_by_value(self, value:bool):
34
- """
35
- Get all image classification annotations for this frame with the given value.
36
- """
37
- return ImageClassificationAnnotation.objects.filter(legacy_frame=self, value=value)
38
-
39
- def get_classification_annotations_by_label_and_value(self, label:Label, value:bool):
40
- """
41
- Get all image classification annotations for this frame with the given label and value.
42
- """
43
- return ImageClassificationAnnotation.objects.filter(legacy_frame=self, label=label, value=value)
44
-
45
-
28
+ if TYPE_CHECKING:
29
+ video: "Video"
@@ -1,6 +1,3 @@
1
- import os
2
- from pathlib import Path
3
-
4
1
  from .raw_video import RawVideoFile
5
2
  from .raw_pdf import RawPdfFile
6
3
 
@@ -14,19 +11,8 @@ from .raw_pdf import RawPdfFile
14
11
  # main method is import_files which expects a path to a directory containing files to import.
15
12
  # creates correct import file object depending on file type by checking the file extension
16
13
 
17
- class FileImporter:
18
- def __init__(self, directory):
19
- self.directory = directory
20
14
 
21
- def import_files(self):
22
- directory_path = Path(self.directory)
23
- for file in directory_path.iterdir():
24
- if file.is_file():
25
- if file.suffix.lower() in ['.mov', '.mp4']:
26
- RawVideoFile.create_from_file(file)
27
- else:
28
- raise ValueError(f"File type {file.suffix} not supported")
29
- else:
30
- raise ValueError(f"{file} is not a file")
31
-
32
-
15
+ __all__ = [
16
+ "RawPdfFile",
17
+ "RawVideoFile",
18
+ ]
@@ -7,182 +7,254 @@
7
7
 
8
8
  from django.db import models
9
9
  from django.core.files.storage import FileSystemStorage
10
- from django.core.files import File
11
10
  from django.conf import settings
12
- from django.utils import timezone
13
11
  from django.core.exceptions import ValidationError
14
12
  from django.core.validators import FileExtensionValidator
15
13
  from endoreg_db.utils.file_operations import get_uuid_filename
14
+ from icecream import ic
16
15
 
17
16
  from agl_report_reader.report_reader import ReportReader
18
17
 
19
18
  from endoreg_db.utils.hashs import get_pdf_hash
20
19
  from ..metadata import SensitiveMeta
20
+ from ..base_classes.abstract_pdf import AbstractPdfFile
21
21
 
22
22
  # setup logging to pdf_import.log
23
23
  import logging
24
- logger = logging.getLogger('pdf_import')
25
24
 
26
25
  import shutil
26
+ from pathlib import Path
27
27
 
28
- # get pdf location from settings, default to ~/erc_data/raw_pdf and create if not exists
29
- PSEUDO_DIR_RAW_PDF = getattr(settings, 'PSEUDO_DIR_RAW_PDF', settings.BASE_DIR / 'erc_data/raw_pdf')
28
+ from ..base_classes.utils import (
29
+ STORAGE_LOCATION,
30
+ )
30
31
 
31
- class RawPdfFile(models.Model):
32
+ logger = logging.getLogger("pdf_import")
33
+
34
+ RAW_PDF_DIR_NAME = "raw_pdf"
35
+ RAW_PDF_DIR = STORAGE_LOCATION / RAW_PDF_DIR_NAME
36
+
37
+ if not RAW_PDF_DIR.exists():
38
+ RAW_PDF_DIR.mkdir(parents=True)
39
+
40
+
41
+ class RawPdfFile(AbstractPdfFile):
32
42
  file = models.FileField(
33
- upload_to='raw_pdf/',
34
- validators=[FileExtensionValidator(allowed_extensions=['pdf'])],
35
- storage=FileSystemStorage(location=PSEUDO_DIR_RAW_PDF.resolve().as_posix()),
43
+ upload_to=f"{RAW_PDF_DIR_NAME}/",
44
+ validators=[FileExtensionValidator(allowed_extensions=["pdf"])],
45
+ storage=FileSystemStorage(location=STORAGE_LOCATION.resolve().as_posix()),
36
46
  )
37
47
 
38
- pdf_hash = models.CharField(max_length=255, unique=True)
39
- pdf_type = models.ForeignKey('PdfType', on_delete=models.CASCADE)
40
- center = models.ForeignKey('Center', on_delete=models.CASCADE)
48
+ patient = models.ForeignKey(
49
+ "Patient",
50
+ on_delete=models.SET_NULL,
51
+ blank=True,
52
+ null=True,
53
+ related_name="raw_pdf_files",
54
+ )
41
55
 
42
- state_report_processing_required = models.BooleanField(default = True)
56
+ state_report_processing_required = models.BooleanField(default=True)
43
57
  state_report_processed = models.BooleanField(default=False)
44
-
58
+ raw_meta = models.JSONField(blank=True, null=True)
45
59
  # report_file = models.OneToOneField("ReportFile", on_delete=models.CASCADE, null=True, blank=True)
46
- sensitive_meta = models.OneToOneField(
47
- 'SensitiveMeta',
48
- on_delete=models.CASCADE,
49
- related_name='raw_pdf_file',
60
+ sensitive_meta = models.ForeignKey(
61
+ "SensitiveMeta",
62
+ on_delete=models.SET_NULL,
63
+ related_name="raw_pdf_files",
50
64
  null=True,
51
65
  blank=True,
52
66
  )
53
67
 
54
- text = models.TextField(blank=True, null=True)
55
- anonymized_text = models.TextField(blank=True, null=True)
56
-
57
- raw_meta = models.JSONField(blank=True, null=True)
58
-
59
- created_at = models.DateTimeField(auto_now_add=True)
68
+ report_file = models.ForeignKey(
69
+ "ReportFile",
70
+ on_delete=models.SET_NULL,
71
+ related_name="raw_pdf_files",
72
+ null=True,
73
+ blank=True,
74
+ )
60
75
 
61
- def __str__(self):
62
- str_repr = f"RawPdfFile: {self.file.name}"
63
- return str_repr
76
+ anonymized_text = models.TextField(blank=True, null=True)
64
77
 
65
78
  @classmethod
66
79
  def create_from_file(
67
80
  cls,
68
- file_path,
81
+ file_path: Path,
69
82
  center_name,
70
- pdf_type_name, # to be depreceated / changed since we now import all pdfs from same directory
71
- destination_dir,
72
83
  save=True,
84
+ delete_source=True,
73
85
  ):
74
- from endoreg_db.models import PdfType, Center
86
+ from endoreg_db.models import Center
87
+
75
88
  logger.info(f"Creating RawPdfFile object from file: {file_path}")
76
- original_file_name = file_path.name
89
+ ic(f"Creating RawPdfFile object from file: {file_path}")
77
90
 
78
91
  new_file_name, uuid = get_uuid_filename(file_path)
79
92
 
80
- if not destination_dir.exists():
81
- destination_dir.mkdir(parents=True)
82
-
83
93
  pdf_hash = get_pdf_hash(file_path)
84
-
94
+ ic(pdf_hash)
95
+ new_file_path = RAW_PDF_DIR / new_file_name
85
96
  # check if pdf file already exists
97
+
86
98
  if cls.objects.filter(pdf_hash=pdf_hash).exists():
99
+ existing_pdf_file = cls.objects.filter(pdf_hash=pdf_hash).get()
87
100
  logger.warning(f"RawPdfFile with hash {pdf_hash} already exists")
88
- return None
89
-
90
- assert pdf_type_name is not None, "pdf_type_name is required"
101
+ ic(f"RawPdfFile with hash {pdf_hash} already exists")
102
+
103
+ existing_pdf_file.verify_existing_file(fallback_file=file_path)
104
+
105
+ return existing_pdf_file
106
+
107
+ else:
108
+ ic(f"No existing pdf file found for hash {pdf_hash}")
109
+
110
+ # assert pdf_type_name is not None, "pdf_type_name is required"
91
111
  assert center_name is not None, "center_name is required"
92
112
 
93
- pdf_type = PdfType.objects.get(name=pdf_type_name)
113
+ # pdf_type = PdfType.objects.get(name=pdf_type_name)
94
114
  center = Center.objects.get(name=center_name)
95
115
 
96
- new_file_path = destination_dir / new_file_name
97
-
98
116
  logger.info(f"Copying file to {new_file_path}")
99
- success = shutil.copy(file_path, new_file_path)
100
-
117
+ ic(f"Copying file to {new_file_path}")
118
+ _success = shutil.copy(file_path, new_file_path)
119
+
101
120
  # validate copy operation by comparing hashs
102
121
  assert get_pdf_hash(new_file_path) == pdf_hash, "Copy operation failed"
103
122
 
104
123
  raw_pdf = cls(
105
124
  file=new_file_path.resolve().as_posix(),
106
125
  pdf_hash=pdf_hash,
107
- pdf_type=pdf_type,
126
+ # pdf_type=pdf_type,
108
127
  center=center,
109
128
  )
129
+ raw_pdf.save()
110
130
  logger.info(f"RawPdfFile object created: {raw_pdf}")
131
+ ic(f"RawPdfFile object created: {raw_pdf}")
111
132
 
112
133
  # remove source file
113
- file_path.unlink()
114
- logger.info(f"Source file removed: {file_path}")
134
+ if delete_source:
135
+ file_path.unlink()
136
+ logger.info(f"Source file removed: {file_path}")
137
+ ic(f"Source file removed: {file_path}")
115
138
 
116
139
  if save:
117
140
  raw_pdf.save()
118
-
119
141
 
120
142
  return raw_pdf
121
143
 
122
- def process_file(self, verbose = False):
123
-
144
+ def save(self, *args, **kwargs):
145
+ if not self.file.name.endswith(".pdf"):
146
+ raise ValidationError("Only PDF files are allowed")
147
+
148
+ if not self.pdf_hash:
149
+ self.pdf_hash = get_pdf_hash(self.file.path)
150
+
151
+ super().save(*args, **kwargs)
152
+
153
+ def verify_existing_file(self, fallback_file):
154
+ if not Path(self.file.path).exists():
155
+ logger.warning(f"File not found: {self.file.path}")
156
+ logger.warning(f"Using fallback file: {fallback_file}")
157
+ ic(f"File not found: {self.file.path}")
158
+ ic(f"Copy fallback file: {fallback_file} to existing filepath")
159
+
160
+ shutil.copy(fallback_file, self.file.path)
161
+
162
+ self.save()
163
+
164
+ def process_file(self, verbose=False):
124
165
  pdf_path = self.file.path
125
166
  rr_config = self.get_report_reader_config()
126
167
 
127
- rr = ReportReader(**rr_config) #FIXME In future we need to pass a configuration file
128
- # This configuration file should be associated with pdf type
168
+ rr = ReportReader(
169
+ **rr_config
170
+ ) # FIXME In future we need to pass a configuration file
171
+ # This configuration file should be associated with pdf type
172
+
173
+ text, anonymized_text, report_meta = rr.process_report(
174
+ pdf_path, verbose=verbose
175
+ )
129
176
 
130
- text, anonymized_text, report_meta = rr.process_report(pdf_path, verbose=verbose)
177
+ self.text = text
178
+ self.anonymized_text = anonymized_text
179
+
180
+ report_meta["center_name"] = self.center.name
131
181
  if not self.sensitive_meta:
132
182
  sensitive_meta = SensitiveMeta.create_from_dict(report_meta)
133
- sensitive_meta.save()
134
183
  self.sensitive_meta = sensitive_meta
135
184
 
136
- else:
185
+ else:
137
186
  # update existing sensitive meta
138
187
  sensitive_meta = self.sensitive_meta
139
188
  sensitive_meta.update_from_dict(report_meta)
140
189
 
141
- return text, anonymized_text, report_meta
142
-
143
- def update(self, save=True, verbose = True):
144
- try:
145
- self.text, self.anonymized_text, self.raw_meta = self.process_file(verbose = verbose)
146
- self.state_report_processed = True
147
- self.state_report_processing_required = False
148
-
149
- if save:
150
-
151
- self.save()
152
-
153
- return True
154
-
155
- except:
156
- logger.error(f"Error processing file: {self.file.path}")
157
- return False
190
+ self.raw_meta = report_meta
158
191
 
159
- def save(self, *args, **kwargs):
160
- if not self.file.name.endswith('.pdf'):
161
- raise ValidationError('Only PDF files are allowed')
162
-
163
- if not self.pdf_hash:
164
- self.pdf_hash = get_pdf_hash(self.file.path)
165
-
166
- super().save(*args, **kwargs)
192
+ sensitive_meta.save()
193
+ self.save()
167
194
 
195
+ return text, anonymized_text, report_meta
168
196
 
169
197
  def get_report_reader_config(self):
170
- if self.pdf_type.endoscope_info_line:
171
- endoscope_info_line = self.pdf_type.endoscope_info_line.value
198
+ from endoreg_db.models import PdfType, Center
199
+ from warnings import warn
200
+
201
+ if not self.pdf_type:
202
+ warn("PdfType not set, using default settings")
203
+ pdf_type = PdfType.default_pdf_type()
204
+ else:
205
+ pdf_type: PdfType = self.pdf_type
206
+ center: Center = self.center
207
+ if pdf_type.endoscope_info_line:
208
+ endoscope_info_line = pdf_type.endoscope_info_line.value
209
+
172
210
  else:
173
211
  endoscope_info_line = None
174
212
  settings_dict = {
175
213
  "locale": "de_DE",
176
- "employee_first_names": [_.name for _ in self.center.first_names.all()],
177
- "employee_last_names": [_.name for _ in self.center.last_names.all()],
178
- "text_date_format":'%d.%m.%Y',
214
+ "employee_first_names": [_.name for _ in center.first_names.all()],
215
+ "employee_last_names": [_.name for _ in center.last_names.all()],
216
+ "text_date_format": "%d.%m.%Y",
179
217
  "flags": {
180
- "patient_info_line": self.pdf_type.patient_info_line.value,
218
+ "patient_info_line": pdf_type.patient_info_line.value,
181
219
  "endoscope_info_line": endoscope_info_line,
182
- "examiner_info_line": self.pdf_type.examiner_info_line.value,
183
- "cut_off_below": [_.value for _ in self.pdf_type.cut_off_below_lines.all()],
184
- "cut_off_above": [_.value for _ in self.pdf_type.cut_off_above_lines.all()],
185
- }
220
+ "examiner_info_line": pdf_type.examiner_info_line.value,
221
+ "cut_off_below": [_.value for _ in pdf_type.cut_off_below_lines.all()],
222
+ "cut_off_above": [_.value for _ in pdf_type.cut_off_above_lines.all()],
223
+ },
186
224
  }
187
225
 
188
226
  return settings_dict
227
+
228
+ def get_or_create_report_file(self):
229
+ from endoreg_db.models import ReportFile
230
+
231
+ if self.report_file:
232
+ report_file = self.report_file
233
+
234
+ elif ReportFile.objects.filter(pdf_hash=self.pdf_hash).exists():
235
+ report_file = ReportFile.objects.filter(pdf_hash=self.pdf_hash).get()
236
+ self.report_file = report_file
237
+ self.save()
238
+ else:
239
+ # TODO Make sure all required states are set
240
+ patient = self.sensitive_meta.get_or_create_pseudo_patient()
241
+ examiner = self.sensitive_meta.get_or_create_pseudo_examiner()
242
+ patient_examination = (
243
+ self.sensitive_meta.get_or_create_pseudo_patient_examination()
244
+ )
245
+
246
+ report_file = ReportFile.objects.create(
247
+ pdf_hash=self.pdf_hash,
248
+ center=self.center,
249
+ sensitive_meta=self.sensitive_meta,
250
+ patient=patient,
251
+ examiner=examiner,
252
+ examination=patient_examination,
253
+ text=self.anonymized_text,
254
+ )
255
+
256
+ report_file.save()
257
+ self.report_file = report_file
258
+ self.save()
259
+
260
+ return report_file