endoreg-db 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of endoreg-db might be problematic. Click here for more details.

Files changed (320) hide show
  1. endoreg_db/admin.py +90 -1
  2. endoreg_db/urls.py +173 -0
  3. endoreg_db/views.py +0 -3
  4. endoreg_db-0.6.0.dist-info/METADATA +151 -0
  5. endoreg_db-0.6.0.dist-info/RECORD +11 -0
  6. {endoreg_db-0.5.2.dist-info → endoreg_db-0.6.0.dist-info}/WHEEL +2 -1
  7. endoreg_db-0.6.0.dist-info/top_level.txt +1 -0
  8. endoreg_db/data/__init__.py +0 -72
  9. endoreg_db/data/active_model/data.yaml +0 -3
  10. endoreg_db/data/agl_service/data.yaml +0 -19
  11. endoreg_db/data/case_template/rule/00_patient_lab_sample_add_default_value.yaml +0 -167
  12. endoreg_db/data/case_template/rule/01_patient-set-age.yaml +0 -8
  13. endoreg_db/data/case_template/rule/01_patient-set-gender.yaml +0 -9
  14. endoreg_db/data/case_template/rule/11_create_patient_lab_sample.yaml +0 -23
  15. endoreg_db/data/case_template/rule/12_create-patient_medication-anticoagulation.yaml +0 -19
  16. endoreg_db/data/case_template/rule/13_create-patient_medication_schedule-anticoagulation.yaml +0 -19
  17. endoreg_db/data/case_template/rule/19_create_patient.yaml +0 -17
  18. endoreg_db/data/case_template/rule_type/base_types.yaml +0 -35
  19. endoreg_db/data/case_template/rule_value/.init +0 -0
  20. endoreg_db/data/case_template/rule_value_type/base_types.yaml +0 -59
  21. endoreg_db/data/case_template/template/base.yaml +0 -8
  22. endoreg_db/data/case_template/template_type/pre_endoscopy.yaml +0 -3
  23. endoreg_db/data/case_template/tmp/_rule_value +0 -13
  24. endoreg_db/data/case_template/tmp/rule/01_atrial_fibrillation.yaml +0 -21
  25. endoreg_db/data/case_template/tmp/rule/02_create_object.yaml +0 -10
  26. endoreg_db/data/case_template/tmp/template/atrial_fibrillation_low_risk.yaml +0 -7
  27. endoreg_db/data/center/data.yaml +0 -60
  28. endoreg_db/data/center_resource/green_endoscopy_dashboard_CenterResource.yaml +0 -144
  29. endoreg_db/data/center_waste/green_endoscopy_dashboard_CenterWaste.yaml +0 -48
  30. endoreg_db/data/disease/cardiovascular.yaml +0 -37
  31. endoreg_db/data/disease/hepatology.yaml +0 -5
  32. endoreg_db/data/disease/misc.yaml +0 -6
  33. endoreg_db/data/disease/renal.yaml +0 -5
  34. endoreg_db/data/disease_classification/chronic_kidney_disease.yaml +0 -6
  35. endoreg_db/data/disease_classification/coronary_vessel_disease.yaml +0 -6
  36. endoreg_db/data/disease_classification_choice/chronic_kidney_disease.yaml +0 -41
  37. endoreg_db/data/disease_classification_choice/coronary_vessel_disease.yaml +0 -20
  38. endoreg_db/data/distribution/date/patient.yaml +0 -7
  39. endoreg_db/data/distribution/multiple_categorical/.init +0 -0
  40. endoreg_db/data/distribution/numeric/.init +0 -0
  41. endoreg_db/data/distribution/single_categorical/patient.yaml +0 -7
  42. endoreg_db/data/emission_factor/green_endoscopy_dashboard_EmissionFactor.yaml +0 -132
  43. endoreg_db/data/endoscope_type/data.yaml +0 -11
  44. endoreg_db/data/endoscopy_processor/data.yaml +0 -47
  45. endoreg_db/data/event/cardiology.yaml +0 -28
  46. endoreg_db/data/event/neurology.yaml +0 -14
  47. endoreg_db/data/event/surgery.yaml +0 -13
  48. endoreg_db/data/event/thrombembolism.yaml +0 -20
  49. endoreg_db/data/examination/examinations/data.yaml +0 -66
  50. endoreg_db/data/examination/time/data.yaml +0 -48
  51. endoreg_db/data/examination/time-type/data.yaml +0 -8
  52. endoreg_db/data/examination/type/data.yaml +0 -5
  53. endoreg_db/data/gender/data.yaml +0 -18
  54. endoreg_db/data/information_source/data.yaml +0 -30
  55. endoreg_db/data/information_source/medication.yaml +0 -6
  56. endoreg_db/data/lab_value/cardiac_enzymes.yaml +0 -31
  57. endoreg_db/data/lab_value/coagulation.yaml +0 -49
  58. endoreg_db/data/lab_value/electrolytes.yaml +0 -190
  59. endoreg_db/data/lab_value/gastrointestinal_function.yaml +0 -121
  60. endoreg_db/data/lab_value/hematology.yaml +0 -169
  61. endoreg_db/data/lab_value/hormones.yaml +0 -53
  62. endoreg_db/data/lab_value/lipids.yaml +0 -44
  63. endoreg_db/data/lab_value/misc.yaml +0 -30
  64. endoreg_db/data/lab_value/renal_function.yaml +0 -11
  65. endoreg_db/data/label/label/data.yaml +0 -62
  66. endoreg_db/data/label/label-set/data.yaml +0 -18
  67. endoreg_db/data/label/label-type/data.yaml +0 -7
  68. endoreg_db/data/log_type/data.yaml +0 -57
  69. endoreg_db/data/material/material.yaml +0 -91
  70. endoreg_db/data/medication/anticoagulation.yaml +0 -65
  71. endoreg_db/data/medication/tah.yaml +0 -70
  72. endoreg_db/data/medication_indication/anticoagulation.yaml +0 -120
  73. endoreg_db/data/medication_indication_type/data.yaml +0 -11
  74. endoreg_db/data/medication_indication_type/thrombembolism.yaml +0 -41
  75. endoreg_db/data/medication_intake_time/base.yaml +0 -31
  76. endoreg_db/data/medication_schedule/apixaban.yaml +0 -95
  77. endoreg_db/data/medication_schedule/ass.yaml +0 -12
  78. endoreg_db/data/medication_schedule/enoxaparin.yaml +0 -26
  79. endoreg_db/data/model_type/data.yaml +0 -7
  80. endoreg_db/data/network_device/data.yaml +0 -29
  81. endoreg_db/data/network_device_type/data.yaml +0 -12
  82. endoreg_db/data/patient_lab_sample_type/generic.yaml +0 -6
  83. endoreg_db/data/pdf_type/data.yaml +0 -28
  84. endoreg_db/data/product/green_endoscopy_dashboard_Product.yaml +0 -66
  85. endoreg_db/data/product_group/green_endoscopy_dashboard_ProductGroup.yaml +0 -33
  86. endoreg_db/data/product_material/green_endoscopy_dashboard_ProductMaterial.yaml +0 -308
  87. endoreg_db/data/product_weight/green_endoscopy_dashboard_ProductWeight.yaml +0 -88
  88. endoreg_db/data/profession/data.yaml +0 -70
  89. endoreg_db/data/reference_product/green_endoscopy_dashboard_ReferenceProduct.yaml +0 -55
  90. endoreg_db/data/report_reader_flag/ukw-examination-generic.yaml +0 -26
  91. endoreg_db/data/report_reader_flag/ukw-histology-generic.yaml +0 -19
  92. endoreg_db/data/resource/green_endoscopy_dashboard_Resource.yaml +0 -15
  93. endoreg_db/data/tmp/chronic_kidney_disease.yaml +0 -0
  94. endoreg_db/data/tmp/congestive_heart_failure.yaml +0 -0
  95. endoreg_db/data/transport_route/green_endoscopy_dashboard_TransportRoute.yaml +0 -12
  96. endoreg_db/data/unit/concentration.yaml +0 -92
  97. endoreg_db/data/unit/data.yaml +0 -17
  98. endoreg_db/data/unit/length.yaml +0 -31
  99. endoreg_db/data/unit/misc.yaml +0 -20
  100. endoreg_db/data/unit/rate.yaml +0 -6
  101. endoreg_db/data/unit/time.yaml +0 -13
  102. endoreg_db/data/unit/volume.yaml +0 -35
  103. endoreg_db/data/unit/weight.yaml +0 -38
  104. endoreg_db/data/waste/data.yaml +0 -12
  105. endoreg_db/forms/__init__.py +0 -3
  106. endoreg_db/forms/questionnaires/__init__.py +0 -1
  107. endoreg_db/forms/questionnaires/tto_questionnaire.py +0 -23
  108. endoreg_db/forms/settings/__init__.py +0 -8
  109. endoreg_db/forms/unit.py +0 -6
  110. endoreg_db/management/commands/_load_model_template.py +0 -41
  111. endoreg_db/management/commands/delete_all.py +0 -18
  112. endoreg_db/management/commands/delete_legacy_images.py +0 -19
  113. endoreg_db/management/commands/delete_legacy_videos.py +0 -17
  114. endoreg_db/management/commands/extract_legacy_video_frames.py +0 -18
  115. endoreg_db/management/commands/fetch_legacy_image_dataset.py +0 -32
  116. endoreg_db/management/commands/fix_auth_permission.py +0 -20
  117. endoreg_db/management/commands/import_legacy_images.py +0 -94
  118. endoreg_db/management/commands/import_legacy_videos.py +0 -76
  119. endoreg_db/management/commands/load_active_model_data.py +0 -45
  120. endoreg_db/management/commands/load_ai_model_data.py +0 -45
  121. endoreg_db/management/commands/load_base_db_data.py +0 -136
  122. endoreg_db/management/commands/load_center_data.py +0 -43
  123. endoreg_db/management/commands/load_disease_classification_choices_data.py +0 -41
  124. endoreg_db/management/commands/load_disease_classification_data.py +0 -41
  125. endoreg_db/management/commands/load_disease_data.py +0 -40
  126. endoreg_db/management/commands/load_distribution_data.py +0 -66
  127. endoreg_db/management/commands/load_endoscope_type_data.py +0 -45
  128. endoreg_db/management/commands/load_endoscopy_processor_data.py +0 -45
  129. endoreg_db/management/commands/load_event_data.py +0 -41
  130. endoreg_db/management/commands/load_examination_data.py +0 -75
  131. endoreg_db/management/commands/load_g_play_data.py +0 -113
  132. endoreg_db/management/commands/load_gender_data.py +0 -44
  133. endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +0 -133
  134. endoreg_db/management/commands/load_information_source.py +0 -45
  135. endoreg_db/management/commands/load_lab_value_data.py +0 -50
  136. endoreg_db/management/commands/load_label_data.py +0 -67
  137. endoreg_db/management/commands/load_logging_data.py +0 -39
  138. endoreg_db/management/commands/load_medication_data.py +0 -41
  139. endoreg_db/management/commands/load_medication_indication_data.py +0 -63
  140. endoreg_db/management/commands/load_medication_indication_type_data.py +0 -41
  141. endoreg_db/management/commands/load_medication_intake_time_data.py +0 -41
  142. endoreg_db/management/commands/load_medication_schedule_data.py +0 -55
  143. endoreg_db/management/commands/load_network_data.py +0 -57
  144. endoreg_db/management/commands/load_pdf_type_data.py +0 -61
  145. endoreg_db/management/commands/load_profession_data.py +0 -44
  146. endoreg_db/management/commands/load_report_reader_flag.py +0 -46
  147. endoreg_db/management/commands/load_unit_data.py +0 -46
  148. endoreg_db/management/commands/load_user_groups.py +0 -28
  149. endoreg_db/management/commands/register_ai_model.py +0 -64
  150. endoreg_db/management/commands/reset_celery_schedule.py +0 -9
  151. endoreg_db/migrations/0001_initial.py +0 -1567
  152. endoreg_db/migrations/0002_anonymizedimagelabel_anonymousimageannotation_and_more.py +0 -55
  153. endoreg_db/migrations/0003_anonymousimageannotation_original_image_url_and_more.py +0 -39
  154. endoreg_db/migrations/0004_alter_rawpdffile_file.py +0 -20
  155. endoreg_db/migrations/0005_uploadedfile_alter_rawpdffile_file_anonymizedfile.py +0 -40
  156. endoreg_db/migrations/0006_alter_rawpdffile_file.py +0 -20
  157. endoreg_db/migrations/0007_networkdevicelogentry_datetime_and_more.py +0 -43
  158. endoreg_db/migrations/0008_networkdevicelogentry_aglnet_ip_and_more.py +0 -28
  159. endoreg_db/migrations/0009_alter_networkdevicelogentry_vpn_service_status.py +0 -18
  160. endoreg_db/migrations/0010_remove_networkdevicelogentry_hostname.py +0 -17
  161. endoreg_db/migrations/__init__.py +0 -0
  162. endoreg_db/models/__init__.py +0 -73
  163. endoreg_db/models/ai_model/__init__.py +0 -3
  164. endoreg_db/models/ai_model/active_model.py +0 -9
  165. endoreg_db/models/ai_model/model_meta.py +0 -24
  166. endoreg_db/models/ai_model/model_type.py +0 -26
  167. endoreg_db/models/ai_model/utils.py +0 -8
  168. endoreg_db/models/annotation/__init__.py +0 -3
  169. endoreg_db/models/annotation/anonymized_image_annotation.py +0 -60
  170. endoreg_db/models/annotation/binary_classification_annotation_task.py +0 -80
  171. endoreg_db/models/annotation/image_classification.py +0 -27
  172. endoreg_db/models/case_template/__init__.py +0 -6
  173. endoreg_db/models/case_template/case_template.py +0 -81
  174. endoreg_db/models/case_template/case_template_rule.py +0 -276
  175. endoreg_db/models/case_template/case_template_rule_value.py +0 -73
  176. endoreg_db/models/case_template/case_template_type.py +0 -28
  177. endoreg_db/models/center/__init__.py +0 -4
  178. endoreg_db/models/center/center.py +0 -25
  179. endoreg_db/models/center/center_product.py +0 -34
  180. endoreg_db/models/center/center_resource.py +0 -19
  181. endoreg_db/models/center/center_waste.py +0 -11
  182. endoreg_db/models/data_file/__init__.py +0 -6
  183. endoreg_db/models/data_file/base_classes/__init__.py +0 -2
  184. endoreg_db/models/data_file/base_classes/abstract_frame.py +0 -51
  185. endoreg_db/models/data_file/base_classes/abstract_video.py +0 -201
  186. endoreg_db/models/data_file/frame.py +0 -45
  187. endoreg_db/models/data_file/import_classes/__init__.py +0 -32
  188. endoreg_db/models/data_file/import_classes/processing_functions/__init__.py +0 -35
  189. endoreg_db/models/data_file/import_classes/processing_functions/pdf.py +0 -28
  190. endoreg_db/models/data_file/import_classes/processing_functions/video.py +0 -260
  191. endoreg_db/models/data_file/import_classes/raw_pdf.py +0 -188
  192. endoreg_db/models/data_file/import_classes/raw_video.py +0 -343
  193. endoreg_db/models/data_file/metadata/__init__.py +0 -3
  194. endoreg_db/models/data_file/metadata/pdf_meta.py +0 -70
  195. endoreg_db/models/data_file/metadata/sensitive_meta.py +0 -31
  196. endoreg_db/models/data_file/metadata/video_meta.py +0 -133
  197. endoreg_db/models/data_file/report_file.py +0 -89
  198. endoreg_db/models/data_file/video/__init__.py +0 -7
  199. endoreg_db/models/data_file/video/import_meta.py +0 -25
  200. endoreg_db/models/data_file/video/video.py +0 -25
  201. endoreg_db/models/data_file/video_segment.py +0 -107
  202. endoreg_db/models/disease.py +0 -56
  203. endoreg_db/models/emission/__init__.py +0 -1
  204. endoreg_db/models/emission/emission_factor.py +0 -20
  205. endoreg_db/models/event.py +0 -22
  206. endoreg_db/models/examination/__init__.py +0 -4
  207. endoreg_db/models/examination/examination.py +0 -26
  208. endoreg_db/models/examination/examination_time.py +0 -27
  209. endoreg_db/models/examination/examination_time_type.py +0 -24
  210. endoreg_db/models/examination/examination_type.py +0 -18
  211. endoreg_db/models/hardware/__init__.py +0 -2
  212. endoreg_db/models/hardware/endoscope.py +0 -44
  213. endoreg_db/models/hardware/endoscopy_processor.py +0 -143
  214. endoreg_db/models/information_source.py +0 -29
  215. endoreg_db/models/label/__init__.py +0 -1
  216. endoreg_db/models/label/label.py +0 -84
  217. endoreg_db/models/laboratory/__init__.py +0 -1
  218. endoreg_db/models/laboratory/lab_value.py +0 -102
  219. endoreg_db/models/legacy_data/__init__.py +0 -3
  220. endoreg_db/models/legacy_data/image.py +0 -34
  221. endoreg_db/models/logging/__init__.py +0 -4
  222. endoreg_db/models/logging/agl_service.py +0 -19
  223. endoreg_db/models/logging/base.py +0 -22
  224. endoreg_db/models/logging/log_type.py +0 -23
  225. endoreg_db/models/logging/network_device.py +0 -27
  226. endoreg_db/models/medication/__init__.py +0 -1
  227. endoreg_db/models/medication/medication.py +0 -148
  228. endoreg_db/models/network/__init__.py +0 -3
  229. endoreg_db/models/network/agl_service.py +0 -38
  230. endoreg_db/models/network/network_device.py +0 -53
  231. endoreg_db/models/network/network_device_type.py +0 -23
  232. endoreg_db/models/other/__init__.py +0 -5
  233. endoreg_db/models/other/distribution.py +0 -215
  234. endoreg_db/models/other/material.py +0 -16
  235. endoreg_db/models/other/resource.py +0 -18
  236. endoreg_db/models/other/transport_route.py +0 -21
  237. endoreg_db/models/other/waste.py +0 -20
  238. endoreg_db/models/patient_examination/__init__.py +0 -35
  239. endoreg_db/models/permissions/__init__.py +0 -44
  240. endoreg_db/models/persons/__init__.py +0 -7
  241. endoreg_db/models/persons/examiner/__init__.py +0 -2
  242. endoreg_db/models/persons/examiner/examiner.py +0 -16
  243. endoreg_db/models/persons/examiner/examiner_type.py +0 -2
  244. endoreg_db/models/persons/first_name.py +0 -18
  245. endoreg_db/models/persons/gender.py +0 -22
  246. endoreg_db/models/persons/last_name.py +0 -20
  247. endoreg_db/models/persons/patient/__init__.py +0 -8
  248. endoreg_db/models/persons/patient/case/__init__.py +0 -0
  249. endoreg_db/models/persons/patient/case/case.py +0 -30
  250. endoreg_db/models/persons/patient/patient.py +0 -216
  251. endoreg_db/models/persons/patient/patient_disease.py +0 -16
  252. endoreg_db/models/persons/patient/patient_event.py +0 -22
  253. endoreg_db/models/persons/patient/patient_lab_sample.py +0 -106
  254. endoreg_db/models/persons/patient/patient_lab_value.py +0 -176
  255. endoreg_db/models/persons/patient/patient_medication.py +0 -44
  256. endoreg_db/models/persons/patient/patient_medication_schedule.py +0 -28
  257. endoreg_db/models/persons/person.py +0 -31
  258. endoreg_db/models/persons/portal_user_information.py +0 -27
  259. endoreg_db/models/prediction/__init__.py +0 -2
  260. endoreg_db/models/prediction/image_classification.py +0 -37
  261. endoreg_db/models/prediction/video_prediction_meta.py +0 -244
  262. endoreg_db/models/product/__init__.py +0 -5
  263. endoreg_db/models/product/product.py +0 -97
  264. endoreg_db/models/product/product_group.py +0 -19
  265. endoreg_db/models/product/product_material.py +0 -24
  266. endoreg_db/models/product/product_weight.py +0 -26
  267. endoreg_db/models/product/reference_product.py +0 -99
  268. endoreg_db/models/questionnaires/__init__.py +0 -114
  269. endoreg_db/models/quiz/__init__.py +0 -2
  270. endoreg_db/models/quiz/quiz_answer.py +0 -41
  271. endoreg_db/models/quiz/quiz_question.py +0 -54
  272. endoreg_db/models/report_reader/__init__.py +0 -2
  273. endoreg_db/models/report_reader/report_reader_config.py +0 -53
  274. endoreg_db/models/report_reader/report_reader_flag.py +0 -20
  275. endoreg_db/models/rules/__init__.py +0 -5
  276. endoreg_db/models/rules/rule.py +0 -24
  277. endoreg_db/models/rules/rule_applicator.py +0 -224
  278. endoreg_db/models/rules/rule_attribute_dtype.py +0 -19
  279. endoreg_db/models/rules/rule_type.py +0 -22
  280. endoreg_db/models/rules/ruleset.py +0 -19
  281. endoreg_db/models/unit.py +0 -22
  282. endoreg_db/queries/__init__.py +0 -5
  283. endoreg_db/queries/annotations/__init__.py +0 -3
  284. endoreg_db/queries/annotations/legacy.py +0 -158
  285. endoreg_db/queries/get/__init__.py +0 -6
  286. endoreg_db/queries/get/annotation.py +0 -0
  287. endoreg_db/queries/get/center.py +0 -42
  288. endoreg_db/queries/get/model.py +0 -13
  289. endoreg_db/queries/get/patient.py +0 -14
  290. endoreg_db/queries/get/patient_examination.py +0 -20
  291. endoreg_db/queries/get/prediction.py +0 -0
  292. endoreg_db/queries/get/report_file.py +0 -33
  293. endoreg_db/queries/get/video.py +0 -31
  294. endoreg_db/queries/get/video_import_meta.py +0 -0
  295. endoreg_db/queries/get/video_prediction_meta.py +0 -0
  296. endoreg_db/queries/sanity/__init_.py +0 -0
  297. endoreg_db/serializers/__init__.py +0 -10
  298. endoreg_db/serializers/ai_model.py +0 -19
  299. endoreg_db/serializers/annotation.py +0 -17
  300. endoreg_db/serializers/center.py +0 -11
  301. endoreg_db/serializers/examination.py +0 -33
  302. endoreg_db/serializers/frame.py +0 -13
  303. endoreg_db/serializers/hardware.py +0 -21
  304. endoreg_db/serializers/label.py +0 -22
  305. endoreg_db/serializers/patient.py +0 -10
  306. endoreg_db/serializers/prediction.py +0 -15
  307. endoreg_db/serializers/report_file.py +0 -7
  308. endoreg_db/serializers/video.py +0 -27
  309. endoreg_db/utils/__init__.py +0 -1
  310. endoreg_db/utils/cropping.py +0 -29
  311. endoreg_db/utils/dataloader.py +0 -92
  312. endoreg_db/utils/file_operations.py +0 -30
  313. endoreg_db/utils/hashs.py +0 -34
  314. endoreg_db/utils/legacy_ocr.py +0 -201
  315. endoreg_db/utils/ocr.py +0 -190
  316. endoreg_db/utils/uuid.py +0 -4
  317. endoreg_db/utils/video_metadata.py +0 -87
  318. endoreg_db-0.5.2.dist-info/METADATA +0 -27
  319. endoreg_db-0.5.2.dist-info/RECORD +0 -319
  320. {endoreg_db-0.5.2.dist-info/licenses → endoreg_db-0.6.0.dist-info}/LICENSE +0 -0
@@ -1,188 +0,0 @@
1
- # models/data_file/import_classes/raw_pdf.py
2
- # django db model "RawPdf"
3
- # Class to store raw pdf file using django file field
4
- # Class contains classmethod to create object from pdf file
5
- # objects contains methods to extract text, extract metadata from text and anonymize text from pdf file uzing agl_report_reader.ReportReader class
6
- # ------------------------------------------------------------------------------
7
-
8
- from django.db import models
9
- from django.core.files.storage import FileSystemStorage
10
- from django.core.files import File
11
- from django.conf import settings
12
- from django.utils import timezone
13
- from django.core.exceptions import ValidationError
14
- from django.core.validators import FileExtensionValidator
15
- from endoreg_db.utils.file_operations import get_uuid_filename
16
-
17
- from agl_report_reader.report_reader import ReportReader
18
-
19
- from endoreg_db.utils.hashs import get_pdf_hash
20
- from ..metadata import SensitiveMeta
21
-
22
- # setup logging to pdf_import.log
23
- import logging
24
- logger = logging.getLogger('pdf_import')
25
-
26
- import shutil
27
-
28
- # get pdf location from settings, default to ~/erc_data/raw_pdf and create if not exists
29
- PSEUDO_DIR_RAW_PDF = getattr(settings, 'PSEUDO_DIR_RAW_PDF', settings.BASE_DIR / 'erc_data/raw_pdf')
30
-
31
- class RawPdfFile(models.Model):
32
- file = models.FileField(
33
- upload_to='raw_pdf/',
34
- validators=[FileExtensionValidator(allowed_extensions=['pdf'])],
35
- storage=FileSystemStorage(location=PSEUDO_DIR_RAW_PDF.resolve().as_posix()),
36
- )
37
-
38
- pdf_hash = models.CharField(max_length=255, unique=True)
39
- pdf_type = models.ForeignKey('PdfType', on_delete=models.CASCADE)
40
- center = models.ForeignKey('Center', on_delete=models.CASCADE)
41
-
42
- state_report_processing_required = models.BooleanField(default = True)
43
- state_report_processed = models.BooleanField(default=False)
44
-
45
- # report_file = models.OneToOneField("ReportFile", on_delete=models.CASCADE, null=True, blank=True)
46
- sensitive_meta = models.OneToOneField(
47
- 'SensitiveMeta',
48
- on_delete=models.CASCADE,
49
- related_name='raw_pdf_file',
50
- null=True,
51
- blank=True,
52
- )
53
-
54
- text = models.TextField(blank=True, null=True)
55
- anonymized_text = models.TextField(blank=True, null=True)
56
-
57
- raw_meta = models.JSONField(blank=True, null=True)
58
-
59
- created_at = models.DateTimeField(auto_now_add=True)
60
-
61
- def __str__(self):
62
- str_repr = f"RawPdfFile: {self.file.name}"
63
- return str_repr
64
-
65
- @classmethod
66
- def create_from_file(
67
- cls,
68
- file_path,
69
- center_name,
70
- pdf_type_name, # to be depreceated / changed since we now import all pdfs from same directory
71
- destination_dir,
72
- save=True,
73
- ):
74
- from endoreg_db.models import PdfType, Center
75
- logger.info(f"Creating RawPdfFile object from file: {file_path}")
76
- original_file_name = file_path.name
77
-
78
- new_file_name, uuid = get_uuid_filename(file_path)
79
-
80
- if not destination_dir.exists():
81
- destination_dir.mkdir(parents=True)
82
-
83
- pdf_hash = get_pdf_hash(file_path)
84
-
85
- # check if pdf file already exists
86
- if cls.objects.filter(pdf_hash=pdf_hash).exists():
87
- logger.warning(f"RawPdfFile with hash {pdf_hash} already exists")
88
- return None
89
-
90
- assert pdf_type_name is not None, "pdf_type_name is required"
91
- assert center_name is not None, "center_name is required"
92
-
93
- pdf_type = PdfType.objects.get(name=pdf_type_name)
94
- center = Center.objects.get(name=center_name)
95
-
96
- new_file_path = destination_dir / new_file_name
97
-
98
- logger.info(f"Copying file to {new_file_path}")
99
- success = shutil.copy(file_path, new_file_path)
100
-
101
- # validate copy operation by comparing hashs
102
- assert get_pdf_hash(new_file_path) == pdf_hash, "Copy operation failed"
103
-
104
- raw_pdf = cls(
105
- file=new_file_path.resolve().as_posix(),
106
- pdf_hash=pdf_hash,
107
- pdf_type=pdf_type,
108
- center=center,
109
- )
110
- logger.info(f"RawPdfFile object created: {raw_pdf}")
111
-
112
- # remove source file
113
- file_path.unlink()
114
- logger.info(f"Source file removed: {file_path}")
115
-
116
- if save:
117
- raw_pdf.save()
118
-
119
-
120
- return raw_pdf
121
-
122
- def process_file(self, verbose = False):
123
-
124
- pdf_path = self.file.path
125
- rr_config = self.get_report_reader_config()
126
-
127
- rr = ReportReader(**rr_config) #FIXME In future we need to pass a configuration file
128
- # This configuration file should be associated with pdf type
129
-
130
- text, anonymized_text, report_meta = rr.process_report(pdf_path, verbose=verbose)
131
- if not self.sensitive_meta:
132
- sensitive_meta = SensitiveMeta.create_from_dict(report_meta)
133
- sensitive_meta.save()
134
- self.sensitive_meta = sensitive_meta
135
-
136
- else:
137
- # update existing sensitive meta
138
- sensitive_meta = self.sensitive_meta
139
- sensitive_meta.update_from_dict(report_meta)
140
-
141
- return text, anonymized_text, report_meta
142
-
143
- def update(self, save=True, verbose = True):
144
- try:
145
- self.text, self.anonymized_text, self.raw_meta = self.process_file(verbose = verbose)
146
- self.state_report_processed = True
147
- self.state_report_processing_required = False
148
-
149
- if save:
150
-
151
- self.save()
152
-
153
- return True
154
-
155
- except:
156
- logger.error(f"Error processing file: {self.file.path}")
157
- return False
158
-
159
- def save(self, *args, **kwargs):
160
- if not self.file.name.endswith('.pdf'):
161
- raise ValidationError('Only PDF files are allowed')
162
-
163
- if not self.pdf_hash:
164
- self.pdf_hash = get_pdf_hash(self.file.path)
165
-
166
- super().save(*args, **kwargs)
167
-
168
-
169
- def get_report_reader_config(self):
170
- if self.pdf_type.endoscope_info_line:
171
- endoscope_info_line = self.pdf_type.endoscope_info_line.value
172
- else:
173
- endoscope_info_line = None
174
- settings_dict = {
175
- "locale": "de_DE",
176
- "employee_first_names": [_.name for _ in self.center.first_names.all()],
177
- "employee_last_names": [_.name for _ in self.center.last_names.all()],
178
- "text_date_format":'%d.%m.%Y',
179
- "flags": {
180
- "patient_info_line": self.pdf_type.patient_info_line.value,
181
- "endoscope_info_line": endoscope_info_line,
182
- "examiner_info_line": self.pdf_type.examiner_info_line.value,
183
- "cut_off_below": [_.value for _ in self.pdf_type.cut_off_below_lines.all()],
184
- "cut_off_above": [_.value for _ in self.pdf_type.cut_off_above_lines.all()],
185
- }
186
- }
187
-
188
- return settings_dict
@@ -1,343 +0,0 @@
1
- from django.db import models
2
- from pathlib import Path
3
- from collections import defaultdict, Counter
4
-
5
- from endoreg_db.utils.hashs import get_video_hash
6
- from endoreg_db.utils.file_operations import get_uuid_filename
7
- from endoreg_db.utils.ocr import extract_text_from_rois
8
-
9
- import shutil
10
- import os
11
- import subprocess
12
-
13
- from ..metadata import VideoMeta, SensitiveMeta
14
-
15
- class RawVideoFile(models.Model):
16
- uuid = models.UUIDField()
17
- file = models.FileField(upload_to="raw_data/")
18
-
19
- sensitive_meta = models.OneToOneField(
20
- "SensitiveMeta", on_delete=models.CASCADE, blank=True, null=True
21
- )
22
-
23
- center = models.ForeignKey("Center", on_delete=models.CASCADE)
24
- processor = models.ForeignKey(
25
- "EndoscopyProcessor", on_delete=models.CASCADE, blank=True, null=True
26
- )
27
- video_meta = models.OneToOneField(
28
- "VideoMeta", on_delete=models.CASCADE, blank=True, null=True
29
- )
30
- original_file_name = models.CharField(max_length=255)
31
- video_hash = models.CharField(max_length=255, unique=True)
32
- uploaded_at = models.DateTimeField(auto_now_add=True)
33
-
34
- # Frame Extraction States
35
- state_frames_required = models.BooleanField(default=True)
36
- state_frames_extracted = models.BooleanField(default=False)
37
-
38
- # Video
39
- ## Prediction
40
- state_initial_prediction_required = models.BooleanField(default=True)
41
- state_initial_prediction_completed = models.BooleanField(default=False)
42
- state_initial_prediction_import_required = models.BooleanField(default=True)
43
- state_initial_prediction_import_completed = models.BooleanField(default=False)
44
- ## OCR
45
- state_ocr_required = models.BooleanField(default=True)
46
- state_ocr_completed = models.BooleanField(default=False)
47
- ## Validation
48
- state_outside_validated = models.BooleanField(default=False)
49
- state_ocr_result_validated = models.BooleanField(default=False)
50
-
51
- state_sensitive_data_retrieved = models.BooleanField(default=False)
52
-
53
- # Dataset complete?
54
- state_histology_required = models.BooleanField(blank=True, null=True)
55
- state_histology_available = models.BooleanField(default=False)
56
- state_follow_up_intervention_required = models.BooleanField(blank=True, null=True)
57
- state_follow_up_intervention_available = models.BooleanField(default=False)
58
- state_dataset_complete = models.BooleanField(default=False)
59
-
60
- # Finalizing for Upload
61
- state_anonym_video_required = models.BooleanField(default=True)
62
- state_anonym_video_performed = models.BooleanField(default=False)
63
- state_original_reports_deleted = models.BooleanField(default=False)
64
- state_original_video_deleted = models.BooleanField(default=False)
65
- state_finalized = models.BooleanField(default=False)
66
-
67
- frame_dir = models.CharField(max_length=255)
68
- prediction_dir = models.CharField(max_length=255)
69
-
70
- @classmethod
71
- def create_from_file(
72
- cls,
73
- file_path: Path,
74
- video_dir: Path,
75
- center_name: str,
76
- processor_name: str,
77
- frame_dir_parent: Path,
78
- save: bool = True,
79
- ):
80
- from endoreg_db.models import Center, EndoscopyProcessor
81
-
82
- print(f"Creating RawVideoFile from {file_path}")
83
- original_file_name = file_path.name
84
- # Rename and and move
85
-
86
- new_file_name, uuid = get_uuid_filename(file_path)
87
- framedir: Path = frame_dir_parent / str(uuid)
88
-
89
- if not framedir.exists():
90
- framedir.mkdir(parents=True, exist_ok=True)
91
-
92
- if not video_dir.exists():
93
- video_dir.mkdir(parents=True, exist_ok=True)
94
-
95
- video_hash = get_video_hash(file_path)
96
-
97
- center = Center.objects.get(name=center_name)
98
- assert center is not None, "Center must exist"
99
-
100
- processor = EndoscopyProcessor.objects.get(name=processor_name)
101
- assert processor is not None, "Processor must exist"
102
-
103
- new_filepath = video_dir / new_file_name
104
-
105
- print(f"Moving {file_path} to {new_filepath}")
106
- shutil.move(file_path.resolve().as_posix(), new_filepath.resolve().as_posix())
107
- print(f"Moved to {new_filepath}")
108
-
109
- # Make sure file was transferred correctly and hash is correct
110
- if not new_filepath.exists():
111
- print(f"File {file_path} was not transferred correctly to {new_filepath}")
112
- return None
113
-
114
- new_hash = get_video_hash(new_filepath)
115
- if new_hash != video_hash:
116
- print(f"Hash of file {file_path} is not correct")
117
- return None
118
-
119
- # make sure that no other file with the same hash exists
120
- if cls.objects.filter(video_hash=video_hash).exists():
121
- # log and print warnint
122
- print(f"File with hash {video_hash} already exists")
123
- return None
124
-
125
- else:
126
- print(center)
127
- # Create a new instance of RawVideoFile
128
- raw_video_file = cls(
129
- uuid=uuid,
130
- file=new_filepath.resolve().as_posix(),
131
- center=center,
132
- processor=processor,
133
- original_file_name=original_file_name,
134
- video_hash=video_hash,
135
- frame_dir=framedir.as_posix(),
136
- )
137
-
138
- # Save the instance to the database
139
- raw_video_file.save()
140
-
141
- return raw_video_file
142
-
143
- def __str__(self):
144
- return self.file.name
145
-
146
- def get_endo_roi(self):
147
- endo_roi = self.video_meta.get_endo_roi()
148
- return endo_roi
149
-
150
- # video meta should be created when video file is created
151
- def save(self, *args, **kwargs):
152
- if self.video_meta is None:
153
- center = self.center
154
- processor = self.processor
155
- self.video_meta = VideoMeta.objects.create(
156
- center=center, processor=processor
157
- )
158
- self.video_meta.initialize_ffmpeg_meta(self.file.path)
159
- super(RawVideoFile, self).save(*args, **kwargs)
160
-
161
- def extract_frames(
162
- self,
163
- quality: int = 2,
164
- frame_dir: Path = None,
165
- overwrite: bool = False,
166
- ext="jpg",
167
- ):
168
- """
169
- Extract frames from the video file and save them to the frame_dir.
170
- For this, ffmpeg must be available in in the current environment.
171
- """
172
- if frame_dir is None:
173
- frame_dir = Path(self.frame_dir)
174
- else:
175
- frame_dir = Path(frame_dir)
176
-
177
- if not frame_dir.exists():
178
- frame_dir.mkdir(parents=True, exist_ok=True)
179
-
180
- if not overwrite and len(list(frame_dir.glob("*.jpg"))) > 0:
181
- print(f"Frames already extracted for {self.file.name}")
182
- return
183
-
184
- video_path = Path(self.file.path).resolve().as_posix()
185
-
186
- frame_path_string = frame_dir.resolve().as_posix()
187
- command = [
188
- "ffmpeg",
189
- "-i",
190
- video_path, #
191
- "-q:v",
192
- str(quality),
193
- os.path.join(frame_path_string, f"frame_%07d.{ext}"),
194
- ]
195
-
196
- # Ensure FFmpeg is available
197
- if not shutil.which("ffmpeg"):
198
- raise EnvironmentError(
199
- "FFmpeg could not be found. Ensure it is installed and in your PATH."
200
- )
201
-
202
- # Extract frames from the video file
203
- # Execute the command
204
- result = subprocess.run(command, capture_output=True, text=True)
205
- if result.returncode != 0:
206
- raise Exception(f"Error extracting frames: {result.stderr}")
207
-
208
- self.state_frames_extracted = True
209
-
210
- return f"Frames extracted to {frame_dir} ({frame_path_string}) with quality {quality}"
211
-
212
- def delete_frames(self):
213
- """
214
- Delete frames extracted from the video file.
215
- """
216
- frame_dir = Path(self.frame_dir)
217
- if frame_dir.exists():
218
- shutil.rmtree(frame_dir)
219
- self.state_frames_extracted = False
220
- self.save()
221
- return f"Frames deleted from {frame_dir}"
222
- else:
223
- return f"No frames to delete for {self.file.name}"
224
-
225
- def get_frame_path(self, n: int = 0):
226
- """
227
- Get the path to the n-th frame extracted from the video file.
228
- Note that the frame numbering starts at 1 in our naming convention.
229
- """
230
- # Adjust index
231
- n = n + 1
232
-
233
- frame_dir = Path(self.frame_dir)
234
- return frame_dir / f"frame_{n:07d}.jpg"
235
-
236
- def get_frame_paths(self):
237
- if not self.state_frames_extracted:
238
- return None
239
- frame_dir = Path(self.frame_dir)
240
- paths = [p for p in frame_dir.glob('*')]
241
- indices = [int(p.stem.split("_")[1]) for p in paths]
242
- path_index_tuples = list(zip(paths, indices))
243
- # sort ascending by index
244
- path_index_tuples.sort(key=lambda x: x[1])
245
- paths, indices = zip(*path_index_tuples)
246
-
247
- return paths
248
-
249
- def get_prediction_dir(self):
250
- return Path(self.prediction_dir)
251
-
252
- def get_predictions_path(self, suffix = ".json"):
253
- pred_dir = self.get_prediction_dir()
254
- return pred_dir.joinpath("predictions").with_suffix(suffix)
255
-
256
- def get_smooth_predictions_path(self, suffix = ".json"):
257
- pred_dir = self.get_prediction_dir()
258
- return pred_dir.joinpath("smooth_predictions").with_suffix(suffix)
259
-
260
- def get_binary_predictions_path(self, suffix = ".json"):
261
- pred_dir = self.get_prediction_dir()
262
- return pred_dir.joinpath("binary_predictions").with_suffix(suffix)
263
-
264
- def get_raw_sequences_path(self, suffix = ".json"):
265
- pred_dir = self.get_prediction_dir()
266
- return pred_dir.joinpath("raw_sequences").with_suffix(suffix)
267
-
268
- def get_filtered_sequences_path(self, suffix=".json"):
269
- pred_dir = self.get_prediction_dir()
270
- return pred_dir.joinpath("filtered_sequences").with_suffix(suffix)
271
-
272
- def extract_text_information(self, frame_fraction: float = 0.001):
273
- """
274
- Extract text information from the video file.
275
- Makes sure that frames are extracted and then processes the frames.
276
- gets all frames from frame_dir and selects a fraction of them to process (at least 1)
277
- """
278
- if not self.state_frames_extracted:
279
- print(f"Frames not extracted for {self.file.name}")
280
- return None
281
-
282
- processor = self.processor
283
-
284
- frame_dir = Path(self.frame_dir)
285
- frames = list(frame_dir.glob("*"))
286
- n_frames = len(frames)
287
- n_frames_to_process = max(1, int(frame_fraction * n_frames))
288
-
289
- # Select evenly spaced frames
290
- frames = frames[:: n_frames // n_frames_to_process]
291
-
292
- # extract text from each frame and store the value to
293
- # defaultdict of lists.
294
- # Then, extract the most frequent value from each list
295
- # Finally, return the dictionary of most frequent values
296
-
297
- # Create a defaultdict to store the extracted text from each ROI
298
- rois_texts = defaultdict(list)
299
-
300
- print(f"Processing {n_frames_to_process} frames from {self.file.name}")
301
- # Process frames
302
- for frame_path in frames[:n_frames_to_process]:
303
- extracted_texts = extract_text_from_rois(frame_path, processor)
304
- for roi, text in extracted_texts.items():
305
- rois_texts[roi].append(text)
306
-
307
- # Get the most frequent text values for each ROI using Counter
308
- for key in rois_texts.keys():
309
- counter = Counter([text for text in rois_texts[key] if text])
310
- rois_texts[key] = counter.most_common(1)[0][0] if counter else None
311
-
312
- return rois_texts
313
-
314
- def update_text_metadata(self, ocr_frame_fraction=0.001):
315
- print(f"Updating metadata for {self.file.name}")
316
- texts = self.extract_text_information(ocr_frame_fraction)
317
-
318
- self.sensitive_meta = SensitiveMeta.create_from_dict(texts)
319
- self.state_sensitive_data_retrieved = True
320
- self.save()
321
-
322
- # Resulting dict depends on defined ROIs for this processor type!
323
-
324
- def update_video_meta(self):
325
- video_meta = self.video_meta
326
- video_path = Path(self.file.path)
327
-
328
- if video_meta is None:
329
- video_meta = VideoMeta.create_from_video(video_path)
330
- self.video_meta = video_meta
331
- self.save()
332
-
333
- else:
334
- video_meta.update_meta(video_path)
335
-
336
- def get_fps(self):
337
- if self.video_meta is None:
338
- self.update_video_meta()
339
-
340
- if self.video_meta.ffmpeg_meta is None:
341
- self.video_meta.initialize_ffmpeg_meta(self.file.path)
342
-
343
- return self.video_meta.get_fps()
@@ -1,3 +0,0 @@
1
- from .sensitive_meta import SensitiveMeta
2
- from .pdf_meta import PdfMeta, PdfType
3
- from .video_meta import VideoMeta, FFMpegMeta, VideoImportMeta
@@ -1,70 +0,0 @@
1
- from django.db import models
2
-
3
- # import endoreg_center_id from django settings
4
- from django.conf import settings
5
-
6
-
7
- # import File class
8
- from django.core.files import File
9
-
10
- # # check if endoreg_center_id is set
11
- # if not hasattr(settings, 'ENDOREG_CENTER_ID'):
12
- # ENDOREG_CENTER_ID = 9999
13
- # else:
14
- # ENDOREG_CENTER_ID = settings.ENDOREG_CENTER_ID
15
-
16
- class PdfType(models.Model):
17
- name = models.CharField(max_length=255)
18
-
19
- patient_info_line = models.ForeignKey(
20
- "ReportReaderFlag",
21
- related_name="pdf_type_patient_info_line",
22
- on_delete=models.CASCADE
23
- )
24
- endoscope_info_line = models.ForeignKey(
25
- "ReportReaderFlag",
26
- related_name="pdf_type_endoscopy_info_line",
27
- on_delete=models.CASCADE,
28
- )
29
- examiner_info_line = models.ForeignKey(
30
- "ReportReaderFlag",
31
- related_name="pdf_type_examiner_info_line",
32
- on_delete=models.CASCADE
33
- )
34
- cut_off_above_lines = models.ManyToManyField(
35
- "ReportReaderFlag",
36
- related_name="pdf_type_cut_off_above_lines",
37
- )
38
- cut_off_below_lines = models.ManyToManyField(
39
- "ReportReaderFlag",
40
- related_name="pdf_type_cut_off_below_lines",
41
- )
42
-
43
-
44
- def __str__(self):
45
- summary = f"{self.name}"
46
- # add lines to summary
47
- summary += f"\nPatient Info Line: {self.patient_info_line.value}"
48
- summary += f"\nEndoscope Info Line: {self.endoscope_info_line.value}"
49
- summary += f"\nExaminer Info Line: {self.examiner_info_line.value}"
50
- summary += f"\nCut Off Above Lines: {[_.value for _ in self.cut_off_above_lines.all()]}"
51
- summary += f"\nCut Off Below Lines: {[_.value for _ in self.cut_off_below_lines.all()]}"
52
-
53
- return summary
54
-
55
- class PdfMeta(models.Model):
56
- pdf_type = models.ForeignKey(PdfType, on_delete=models.CASCADE)
57
- date = models.DateField()
58
- time = models.TimeField()
59
- pdf_hash = models.CharField(max_length=255, unique=True)
60
-
61
- def __str__(self):
62
- return self.pdf_hash
63
-
64
- @classmethod
65
- def create_from_file(cls, pdf_file):
66
- pdf_file = File(pdf_file)
67
- pdf_meta = cls(file=pdf_file)
68
- pdf_meta.save()
69
- return pdf_meta
70
-
@@ -1,31 +0,0 @@
1
- from django.db import models
2
-
3
- class SensitiveMeta(models.Model):
4
- examination_date = models.DateField(blank=True, null=True)
5
- patient_first_name = models.CharField(max_length=255, blank=True, null=True)
6
- patient_last_name = models.CharField(max_length=255, blank=True, null=True)
7
- patient_dob = models.DateField(blank=True, null=True)
8
- endoscope_type = models.CharField(max_length=255, blank=True, null=True)
9
- endoscope_sn = models.CharField(max_length=255, blank=True, null=True)
10
-
11
- @classmethod
12
- def create_from_dict(cls, data: dict):
13
- # data can contain more fields than the model has
14
- field_names = [_.name for _ in cls._meta.fields]
15
- selected_data = {k: v for k, v in data.items() if k in field_names}
16
-
17
- return cls.objects.create(**selected_data)
18
-
19
- def update_from_dict(self, data: dict):
20
- # data can contain more fields than the model has
21
- field_names = [_.name for _ in self._meta.fields]
22
- selected_data = {k: v for k, v in data.items() if k in field_names}
23
-
24
- for k, v in selected_data.items():
25
- setattr(self, k, v)
26
-
27
- self.save()
28
-
29
- def __str__(self):
30
- return f"SensitiveMeta: {self.examination_date} {self.patient_first_name} {self.patient_last_name} (*{self.patient_dob})"
31
-