endoreg-db 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of endoreg-db might be problematic. Click here for more details.
- endoreg_db/admin.py +90 -1
- endoreg_db/urls.py +173 -0
- endoreg_db/views.py +0 -3
- endoreg_db-0.6.0.dist-info/METADATA +151 -0
- endoreg_db-0.6.0.dist-info/RECORD +11 -0
- {endoreg_db-0.5.2.dist-info → endoreg_db-0.6.0.dist-info}/WHEEL +2 -1
- endoreg_db-0.6.0.dist-info/top_level.txt +1 -0
- endoreg_db/data/__init__.py +0 -72
- endoreg_db/data/active_model/data.yaml +0 -3
- endoreg_db/data/agl_service/data.yaml +0 -19
- endoreg_db/data/case_template/rule/00_patient_lab_sample_add_default_value.yaml +0 -167
- endoreg_db/data/case_template/rule/01_patient-set-age.yaml +0 -8
- endoreg_db/data/case_template/rule/01_patient-set-gender.yaml +0 -9
- endoreg_db/data/case_template/rule/11_create_patient_lab_sample.yaml +0 -23
- endoreg_db/data/case_template/rule/12_create-patient_medication-anticoagulation.yaml +0 -19
- endoreg_db/data/case_template/rule/13_create-patient_medication_schedule-anticoagulation.yaml +0 -19
- endoreg_db/data/case_template/rule/19_create_patient.yaml +0 -17
- endoreg_db/data/case_template/rule_type/base_types.yaml +0 -35
- endoreg_db/data/case_template/rule_value/.init +0 -0
- endoreg_db/data/case_template/rule_value_type/base_types.yaml +0 -59
- endoreg_db/data/case_template/template/base.yaml +0 -8
- endoreg_db/data/case_template/template_type/pre_endoscopy.yaml +0 -3
- endoreg_db/data/case_template/tmp/_rule_value +0 -13
- endoreg_db/data/case_template/tmp/rule/01_atrial_fibrillation.yaml +0 -21
- endoreg_db/data/case_template/tmp/rule/02_create_object.yaml +0 -10
- endoreg_db/data/case_template/tmp/template/atrial_fibrillation_low_risk.yaml +0 -7
- endoreg_db/data/center/data.yaml +0 -60
- endoreg_db/data/center_resource/green_endoscopy_dashboard_CenterResource.yaml +0 -144
- endoreg_db/data/center_waste/green_endoscopy_dashboard_CenterWaste.yaml +0 -48
- endoreg_db/data/disease/cardiovascular.yaml +0 -37
- endoreg_db/data/disease/hepatology.yaml +0 -5
- endoreg_db/data/disease/misc.yaml +0 -6
- endoreg_db/data/disease/renal.yaml +0 -5
- endoreg_db/data/disease_classification/chronic_kidney_disease.yaml +0 -6
- endoreg_db/data/disease_classification/coronary_vessel_disease.yaml +0 -6
- endoreg_db/data/disease_classification_choice/chronic_kidney_disease.yaml +0 -41
- endoreg_db/data/disease_classification_choice/coronary_vessel_disease.yaml +0 -20
- endoreg_db/data/distribution/date/patient.yaml +0 -7
- endoreg_db/data/distribution/multiple_categorical/.init +0 -0
- endoreg_db/data/distribution/numeric/.init +0 -0
- endoreg_db/data/distribution/single_categorical/patient.yaml +0 -7
- endoreg_db/data/emission_factor/green_endoscopy_dashboard_EmissionFactor.yaml +0 -132
- endoreg_db/data/endoscope_type/data.yaml +0 -11
- endoreg_db/data/endoscopy_processor/data.yaml +0 -47
- endoreg_db/data/event/cardiology.yaml +0 -28
- endoreg_db/data/event/neurology.yaml +0 -14
- endoreg_db/data/event/surgery.yaml +0 -13
- endoreg_db/data/event/thrombembolism.yaml +0 -20
- endoreg_db/data/examination/examinations/data.yaml +0 -66
- endoreg_db/data/examination/time/data.yaml +0 -48
- endoreg_db/data/examination/time-type/data.yaml +0 -8
- endoreg_db/data/examination/type/data.yaml +0 -5
- endoreg_db/data/gender/data.yaml +0 -18
- endoreg_db/data/information_source/data.yaml +0 -30
- endoreg_db/data/information_source/medication.yaml +0 -6
- endoreg_db/data/lab_value/cardiac_enzymes.yaml +0 -31
- endoreg_db/data/lab_value/coagulation.yaml +0 -49
- endoreg_db/data/lab_value/electrolytes.yaml +0 -190
- endoreg_db/data/lab_value/gastrointestinal_function.yaml +0 -121
- endoreg_db/data/lab_value/hematology.yaml +0 -169
- endoreg_db/data/lab_value/hormones.yaml +0 -53
- endoreg_db/data/lab_value/lipids.yaml +0 -44
- endoreg_db/data/lab_value/misc.yaml +0 -30
- endoreg_db/data/lab_value/renal_function.yaml +0 -11
- endoreg_db/data/label/label/data.yaml +0 -62
- endoreg_db/data/label/label-set/data.yaml +0 -18
- endoreg_db/data/label/label-type/data.yaml +0 -7
- endoreg_db/data/log_type/data.yaml +0 -57
- endoreg_db/data/material/material.yaml +0 -91
- endoreg_db/data/medication/anticoagulation.yaml +0 -65
- endoreg_db/data/medication/tah.yaml +0 -70
- endoreg_db/data/medication_indication/anticoagulation.yaml +0 -120
- endoreg_db/data/medication_indication_type/data.yaml +0 -11
- endoreg_db/data/medication_indication_type/thrombembolism.yaml +0 -41
- endoreg_db/data/medication_intake_time/base.yaml +0 -31
- endoreg_db/data/medication_schedule/apixaban.yaml +0 -95
- endoreg_db/data/medication_schedule/ass.yaml +0 -12
- endoreg_db/data/medication_schedule/enoxaparin.yaml +0 -26
- endoreg_db/data/model_type/data.yaml +0 -7
- endoreg_db/data/network_device/data.yaml +0 -29
- endoreg_db/data/network_device_type/data.yaml +0 -12
- endoreg_db/data/patient_lab_sample_type/generic.yaml +0 -6
- endoreg_db/data/pdf_type/data.yaml +0 -28
- endoreg_db/data/product/green_endoscopy_dashboard_Product.yaml +0 -66
- endoreg_db/data/product_group/green_endoscopy_dashboard_ProductGroup.yaml +0 -33
- endoreg_db/data/product_material/green_endoscopy_dashboard_ProductMaterial.yaml +0 -308
- endoreg_db/data/product_weight/green_endoscopy_dashboard_ProductWeight.yaml +0 -88
- endoreg_db/data/profession/data.yaml +0 -70
- endoreg_db/data/reference_product/green_endoscopy_dashboard_ReferenceProduct.yaml +0 -55
- endoreg_db/data/report_reader_flag/ukw-examination-generic.yaml +0 -26
- endoreg_db/data/report_reader_flag/ukw-histology-generic.yaml +0 -19
- endoreg_db/data/resource/green_endoscopy_dashboard_Resource.yaml +0 -15
- endoreg_db/data/tmp/chronic_kidney_disease.yaml +0 -0
- endoreg_db/data/tmp/congestive_heart_failure.yaml +0 -0
- endoreg_db/data/transport_route/green_endoscopy_dashboard_TransportRoute.yaml +0 -12
- endoreg_db/data/unit/concentration.yaml +0 -92
- endoreg_db/data/unit/data.yaml +0 -17
- endoreg_db/data/unit/length.yaml +0 -31
- endoreg_db/data/unit/misc.yaml +0 -20
- endoreg_db/data/unit/rate.yaml +0 -6
- endoreg_db/data/unit/time.yaml +0 -13
- endoreg_db/data/unit/volume.yaml +0 -35
- endoreg_db/data/unit/weight.yaml +0 -38
- endoreg_db/data/waste/data.yaml +0 -12
- endoreg_db/forms/__init__.py +0 -3
- endoreg_db/forms/questionnaires/__init__.py +0 -1
- endoreg_db/forms/questionnaires/tto_questionnaire.py +0 -23
- endoreg_db/forms/settings/__init__.py +0 -8
- endoreg_db/forms/unit.py +0 -6
- endoreg_db/management/commands/_load_model_template.py +0 -41
- endoreg_db/management/commands/delete_all.py +0 -18
- endoreg_db/management/commands/delete_legacy_images.py +0 -19
- endoreg_db/management/commands/delete_legacy_videos.py +0 -17
- endoreg_db/management/commands/extract_legacy_video_frames.py +0 -18
- endoreg_db/management/commands/fetch_legacy_image_dataset.py +0 -32
- endoreg_db/management/commands/fix_auth_permission.py +0 -20
- endoreg_db/management/commands/import_legacy_images.py +0 -94
- endoreg_db/management/commands/import_legacy_videos.py +0 -76
- endoreg_db/management/commands/load_active_model_data.py +0 -45
- endoreg_db/management/commands/load_ai_model_data.py +0 -45
- endoreg_db/management/commands/load_base_db_data.py +0 -136
- endoreg_db/management/commands/load_center_data.py +0 -43
- endoreg_db/management/commands/load_disease_classification_choices_data.py +0 -41
- endoreg_db/management/commands/load_disease_classification_data.py +0 -41
- endoreg_db/management/commands/load_disease_data.py +0 -40
- endoreg_db/management/commands/load_distribution_data.py +0 -66
- endoreg_db/management/commands/load_endoscope_type_data.py +0 -45
- endoreg_db/management/commands/load_endoscopy_processor_data.py +0 -45
- endoreg_db/management/commands/load_event_data.py +0 -41
- endoreg_db/management/commands/load_examination_data.py +0 -75
- endoreg_db/management/commands/load_g_play_data.py +0 -113
- endoreg_db/management/commands/load_gender_data.py +0 -44
- endoreg_db/management/commands/load_green_endoscopy_wuerzburg_data.py +0 -133
- endoreg_db/management/commands/load_information_source.py +0 -45
- endoreg_db/management/commands/load_lab_value_data.py +0 -50
- endoreg_db/management/commands/load_label_data.py +0 -67
- endoreg_db/management/commands/load_logging_data.py +0 -39
- endoreg_db/management/commands/load_medication_data.py +0 -41
- endoreg_db/management/commands/load_medication_indication_data.py +0 -63
- endoreg_db/management/commands/load_medication_indication_type_data.py +0 -41
- endoreg_db/management/commands/load_medication_intake_time_data.py +0 -41
- endoreg_db/management/commands/load_medication_schedule_data.py +0 -55
- endoreg_db/management/commands/load_network_data.py +0 -57
- endoreg_db/management/commands/load_pdf_type_data.py +0 -61
- endoreg_db/management/commands/load_profession_data.py +0 -44
- endoreg_db/management/commands/load_report_reader_flag.py +0 -46
- endoreg_db/management/commands/load_unit_data.py +0 -46
- endoreg_db/management/commands/load_user_groups.py +0 -28
- endoreg_db/management/commands/register_ai_model.py +0 -64
- endoreg_db/management/commands/reset_celery_schedule.py +0 -9
- endoreg_db/migrations/0001_initial.py +0 -1567
- endoreg_db/migrations/0002_anonymizedimagelabel_anonymousimageannotation_and_more.py +0 -55
- endoreg_db/migrations/0003_anonymousimageannotation_original_image_url_and_more.py +0 -39
- endoreg_db/migrations/0004_alter_rawpdffile_file.py +0 -20
- endoreg_db/migrations/0005_uploadedfile_alter_rawpdffile_file_anonymizedfile.py +0 -40
- endoreg_db/migrations/0006_alter_rawpdffile_file.py +0 -20
- endoreg_db/migrations/0007_networkdevicelogentry_datetime_and_more.py +0 -43
- endoreg_db/migrations/0008_networkdevicelogentry_aglnet_ip_and_more.py +0 -28
- endoreg_db/migrations/0009_alter_networkdevicelogentry_vpn_service_status.py +0 -18
- endoreg_db/migrations/0010_remove_networkdevicelogentry_hostname.py +0 -17
- endoreg_db/migrations/__init__.py +0 -0
- endoreg_db/models/__init__.py +0 -73
- endoreg_db/models/ai_model/__init__.py +0 -3
- endoreg_db/models/ai_model/active_model.py +0 -9
- endoreg_db/models/ai_model/model_meta.py +0 -24
- endoreg_db/models/ai_model/model_type.py +0 -26
- endoreg_db/models/ai_model/utils.py +0 -8
- endoreg_db/models/annotation/__init__.py +0 -3
- endoreg_db/models/annotation/anonymized_image_annotation.py +0 -60
- endoreg_db/models/annotation/binary_classification_annotation_task.py +0 -80
- endoreg_db/models/annotation/image_classification.py +0 -27
- endoreg_db/models/case_template/__init__.py +0 -6
- endoreg_db/models/case_template/case_template.py +0 -81
- endoreg_db/models/case_template/case_template_rule.py +0 -276
- endoreg_db/models/case_template/case_template_rule_value.py +0 -73
- endoreg_db/models/case_template/case_template_type.py +0 -28
- endoreg_db/models/center/__init__.py +0 -4
- endoreg_db/models/center/center.py +0 -25
- endoreg_db/models/center/center_product.py +0 -34
- endoreg_db/models/center/center_resource.py +0 -19
- endoreg_db/models/center/center_waste.py +0 -11
- endoreg_db/models/data_file/__init__.py +0 -6
- endoreg_db/models/data_file/base_classes/__init__.py +0 -2
- endoreg_db/models/data_file/base_classes/abstract_frame.py +0 -51
- endoreg_db/models/data_file/base_classes/abstract_video.py +0 -201
- endoreg_db/models/data_file/frame.py +0 -45
- endoreg_db/models/data_file/import_classes/__init__.py +0 -32
- endoreg_db/models/data_file/import_classes/processing_functions/__init__.py +0 -35
- endoreg_db/models/data_file/import_classes/processing_functions/pdf.py +0 -28
- endoreg_db/models/data_file/import_classes/processing_functions/video.py +0 -260
- endoreg_db/models/data_file/import_classes/raw_pdf.py +0 -188
- endoreg_db/models/data_file/import_classes/raw_video.py +0 -343
- endoreg_db/models/data_file/metadata/__init__.py +0 -3
- endoreg_db/models/data_file/metadata/pdf_meta.py +0 -70
- endoreg_db/models/data_file/metadata/sensitive_meta.py +0 -31
- endoreg_db/models/data_file/metadata/video_meta.py +0 -133
- endoreg_db/models/data_file/report_file.py +0 -89
- endoreg_db/models/data_file/video/__init__.py +0 -7
- endoreg_db/models/data_file/video/import_meta.py +0 -25
- endoreg_db/models/data_file/video/video.py +0 -25
- endoreg_db/models/data_file/video_segment.py +0 -107
- endoreg_db/models/disease.py +0 -56
- endoreg_db/models/emission/__init__.py +0 -1
- endoreg_db/models/emission/emission_factor.py +0 -20
- endoreg_db/models/event.py +0 -22
- endoreg_db/models/examination/__init__.py +0 -4
- endoreg_db/models/examination/examination.py +0 -26
- endoreg_db/models/examination/examination_time.py +0 -27
- endoreg_db/models/examination/examination_time_type.py +0 -24
- endoreg_db/models/examination/examination_type.py +0 -18
- endoreg_db/models/hardware/__init__.py +0 -2
- endoreg_db/models/hardware/endoscope.py +0 -44
- endoreg_db/models/hardware/endoscopy_processor.py +0 -143
- endoreg_db/models/information_source.py +0 -29
- endoreg_db/models/label/__init__.py +0 -1
- endoreg_db/models/label/label.py +0 -84
- endoreg_db/models/laboratory/__init__.py +0 -1
- endoreg_db/models/laboratory/lab_value.py +0 -102
- endoreg_db/models/legacy_data/__init__.py +0 -3
- endoreg_db/models/legacy_data/image.py +0 -34
- endoreg_db/models/logging/__init__.py +0 -4
- endoreg_db/models/logging/agl_service.py +0 -19
- endoreg_db/models/logging/base.py +0 -22
- endoreg_db/models/logging/log_type.py +0 -23
- endoreg_db/models/logging/network_device.py +0 -27
- endoreg_db/models/medication/__init__.py +0 -1
- endoreg_db/models/medication/medication.py +0 -148
- endoreg_db/models/network/__init__.py +0 -3
- endoreg_db/models/network/agl_service.py +0 -38
- endoreg_db/models/network/network_device.py +0 -53
- endoreg_db/models/network/network_device_type.py +0 -23
- endoreg_db/models/other/__init__.py +0 -5
- endoreg_db/models/other/distribution.py +0 -215
- endoreg_db/models/other/material.py +0 -16
- endoreg_db/models/other/resource.py +0 -18
- endoreg_db/models/other/transport_route.py +0 -21
- endoreg_db/models/other/waste.py +0 -20
- endoreg_db/models/patient_examination/__init__.py +0 -35
- endoreg_db/models/permissions/__init__.py +0 -44
- endoreg_db/models/persons/__init__.py +0 -7
- endoreg_db/models/persons/examiner/__init__.py +0 -2
- endoreg_db/models/persons/examiner/examiner.py +0 -16
- endoreg_db/models/persons/examiner/examiner_type.py +0 -2
- endoreg_db/models/persons/first_name.py +0 -18
- endoreg_db/models/persons/gender.py +0 -22
- endoreg_db/models/persons/last_name.py +0 -20
- endoreg_db/models/persons/patient/__init__.py +0 -8
- endoreg_db/models/persons/patient/case/__init__.py +0 -0
- endoreg_db/models/persons/patient/case/case.py +0 -30
- endoreg_db/models/persons/patient/patient.py +0 -216
- endoreg_db/models/persons/patient/patient_disease.py +0 -16
- endoreg_db/models/persons/patient/patient_event.py +0 -22
- endoreg_db/models/persons/patient/patient_lab_sample.py +0 -106
- endoreg_db/models/persons/patient/patient_lab_value.py +0 -176
- endoreg_db/models/persons/patient/patient_medication.py +0 -44
- endoreg_db/models/persons/patient/patient_medication_schedule.py +0 -28
- endoreg_db/models/persons/person.py +0 -31
- endoreg_db/models/persons/portal_user_information.py +0 -27
- endoreg_db/models/prediction/__init__.py +0 -2
- endoreg_db/models/prediction/image_classification.py +0 -37
- endoreg_db/models/prediction/video_prediction_meta.py +0 -244
- endoreg_db/models/product/__init__.py +0 -5
- endoreg_db/models/product/product.py +0 -97
- endoreg_db/models/product/product_group.py +0 -19
- endoreg_db/models/product/product_material.py +0 -24
- endoreg_db/models/product/product_weight.py +0 -26
- endoreg_db/models/product/reference_product.py +0 -99
- endoreg_db/models/questionnaires/__init__.py +0 -114
- endoreg_db/models/quiz/__init__.py +0 -2
- endoreg_db/models/quiz/quiz_answer.py +0 -41
- endoreg_db/models/quiz/quiz_question.py +0 -54
- endoreg_db/models/report_reader/__init__.py +0 -2
- endoreg_db/models/report_reader/report_reader_config.py +0 -53
- endoreg_db/models/report_reader/report_reader_flag.py +0 -20
- endoreg_db/models/rules/__init__.py +0 -5
- endoreg_db/models/rules/rule.py +0 -24
- endoreg_db/models/rules/rule_applicator.py +0 -224
- endoreg_db/models/rules/rule_attribute_dtype.py +0 -19
- endoreg_db/models/rules/rule_type.py +0 -22
- endoreg_db/models/rules/ruleset.py +0 -19
- endoreg_db/models/unit.py +0 -22
- endoreg_db/queries/__init__.py +0 -5
- endoreg_db/queries/annotations/__init__.py +0 -3
- endoreg_db/queries/annotations/legacy.py +0 -158
- endoreg_db/queries/get/__init__.py +0 -6
- endoreg_db/queries/get/annotation.py +0 -0
- endoreg_db/queries/get/center.py +0 -42
- endoreg_db/queries/get/model.py +0 -13
- endoreg_db/queries/get/patient.py +0 -14
- endoreg_db/queries/get/patient_examination.py +0 -20
- endoreg_db/queries/get/prediction.py +0 -0
- endoreg_db/queries/get/report_file.py +0 -33
- endoreg_db/queries/get/video.py +0 -31
- endoreg_db/queries/get/video_import_meta.py +0 -0
- endoreg_db/queries/get/video_prediction_meta.py +0 -0
- endoreg_db/queries/sanity/__init_.py +0 -0
- endoreg_db/serializers/__init__.py +0 -10
- endoreg_db/serializers/ai_model.py +0 -19
- endoreg_db/serializers/annotation.py +0 -17
- endoreg_db/serializers/center.py +0 -11
- endoreg_db/serializers/examination.py +0 -33
- endoreg_db/serializers/frame.py +0 -13
- endoreg_db/serializers/hardware.py +0 -21
- endoreg_db/serializers/label.py +0 -22
- endoreg_db/serializers/patient.py +0 -10
- endoreg_db/serializers/prediction.py +0 -15
- endoreg_db/serializers/report_file.py +0 -7
- endoreg_db/serializers/video.py +0 -27
- endoreg_db/utils/__init__.py +0 -1
- endoreg_db/utils/cropping.py +0 -29
- endoreg_db/utils/dataloader.py +0 -92
- endoreg_db/utils/file_operations.py +0 -30
- endoreg_db/utils/hashs.py +0 -34
- endoreg_db/utils/legacy_ocr.py +0 -201
- endoreg_db/utils/ocr.py +0 -190
- endoreg_db/utils/uuid.py +0 -4
- endoreg_db/utils/video_metadata.py +0 -87
- endoreg_db-0.5.2.dist-info/METADATA +0 -27
- endoreg_db-0.5.2.dist-info/RECORD +0 -319
- {endoreg_db-0.5.2.dist-info/licenses → endoreg_db-0.6.0.dist-info}/LICENSE +0 -0
endoreg_db/utils/dataloader.py
DELETED
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import yaml
|
|
3
|
-
from django.core.exceptions import ObjectDoesNotExist
|
|
4
|
-
|
|
5
|
-
def load_model_data_from_yaml(command, model_name, metadata, verbose):
|
|
6
|
-
"""
|
|
7
|
-
Load model data from YAML files.
|
|
8
|
-
|
|
9
|
-
Args:
|
|
10
|
-
command: Command object for stdout writing.
|
|
11
|
-
model_name: Name of the model being loaded.
|
|
12
|
-
metadata: Metadata including directory and foreign key information.
|
|
13
|
-
verbose: Boolean indicating whether to print verbose output.
|
|
14
|
-
"""
|
|
15
|
-
if verbose:
|
|
16
|
-
command.stdout.write(f"Start loading {model_name}")
|
|
17
|
-
model = metadata["model"]
|
|
18
|
-
dir_path = metadata["dir"]
|
|
19
|
-
foreign_keys = metadata["foreign_keys"]
|
|
20
|
-
foreign_key_models = metadata["foreign_key_models"]
|
|
21
|
-
|
|
22
|
-
_files = [f for f in os.listdir(dir_path) if f.endswith('.yaml')]
|
|
23
|
-
# sort
|
|
24
|
-
_files.sort()
|
|
25
|
-
for file in _files:
|
|
26
|
-
with open(os.path.join(dir_path, file), 'r') as file:
|
|
27
|
-
yaml_data = yaml.safe_load(file)
|
|
28
|
-
|
|
29
|
-
load_data_with_foreign_keys(command, model, yaml_data, foreign_keys, foreign_key_models, verbose)
|
|
30
|
-
|
|
31
|
-
def load_data_with_foreign_keys(command, model, yaml_data, foreign_keys, foreign_key_models, verbose):
|
|
32
|
-
"""
|
|
33
|
-
Load data handling foreign keys and many-to-many relationships.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
command: Command object for stdout writing.
|
|
37
|
-
model: The Django model for the data.
|
|
38
|
-
yaml_data: Data loaded from YAML.
|
|
39
|
-
foreign_keys: List of foreign keys.
|
|
40
|
-
foreign_key_models: Corresponding models for the foreign keys.
|
|
41
|
-
verbose: Boolean indicating whether to print verbose output.
|
|
42
|
-
"""
|
|
43
|
-
for entry in yaml_data:
|
|
44
|
-
fields = entry.get('fields', {})
|
|
45
|
-
name = fields.pop('name', None)
|
|
46
|
-
m2m_relationships = {} # Store many-to-many relationships
|
|
47
|
-
print(entry)
|
|
48
|
-
|
|
49
|
-
# Handle foreign keys and many-to-many relationships
|
|
50
|
-
for fk_field, fk_model in zip(foreign_keys, foreign_key_models):
|
|
51
|
-
print(fk_field, fk_model)
|
|
52
|
-
target_keys = fields.pop(fk_field, None)
|
|
53
|
-
|
|
54
|
-
# Ensure the foreign key exists
|
|
55
|
-
if target_keys is None:
|
|
56
|
-
if verbose:
|
|
57
|
-
command.stdout.write(command.style.WARNING(f"Foreign key {fk_field} not found in fields"))
|
|
58
|
-
continue # Skip if no foreign key provided
|
|
59
|
-
|
|
60
|
-
# Process many-to-many fields or foreign keys
|
|
61
|
-
if isinstance(target_keys, list): # Assume many-to-many relationship
|
|
62
|
-
related_objects = []
|
|
63
|
-
for key in target_keys:
|
|
64
|
-
obj, created = fk_model.objects.get_or_create(name=key)
|
|
65
|
-
if created and verbose:
|
|
66
|
-
command.stdout.write(command.style.SUCCESS(f"Created {fk_model.__name__} {key}"))
|
|
67
|
-
related_objects.append(obj)
|
|
68
|
-
m2m_relationships[fk_field] = related_objects
|
|
69
|
-
else: # Single foreign key relationship
|
|
70
|
-
try:
|
|
71
|
-
if model == "endoreg_db.case_template_rule":
|
|
72
|
-
print(fk_model, target_keys)
|
|
73
|
-
obj = fk_model.objects.get_by_natural_key(target_keys)
|
|
74
|
-
except ObjectDoesNotExist:
|
|
75
|
-
if verbose:
|
|
76
|
-
command.stdout.write(command.style.WARNING(f"{fk_model.__name__} with key {target_keys} not found"))
|
|
77
|
-
continue
|
|
78
|
-
fields[fk_field] = obj
|
|
79
|
-
|
|
80
|
-
# Create or update the main object
|
|
81
|
-
if name is None:
|
|
82
|
-
obj, created = model.objects.get_or_create(**fields)
|
|
83
|
-
else:
|
|
84
|
-
obj, created = model.objects.update_or_create(defaults=fields, name=name)
|
|
85
|
-
if created and verbose:
|
|
86
|
-
command.stdout.write(command.style.SUCCESS(f'Created {model.__name__} {name}'))
|
|
87
|
-
elif verbose:
|
|
88
|
-
command.stdout.write(command.style.WARNING(f'Skipped {model.__name__} {name}, already exists'))
|
|
89
|
-
|
|
90
|
-
# Set many-to-many relationships
|
|
91
|
-
for field_name, related_objs in m2m_relationships.items():
|
|
92
|
-
getattr(obj, field_name).set(related_objs)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import shutil
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from endoreg_db.utils.uuid import get_uuid
|
|
4
|
-
|
|
5
|
-
def get_uuid_filename(file:Path) -> tuple[str, str]:
|
|
6
|
-
"""
|
|
7
|
-
Returns a new filename with a uuid
|
|
8
|
-
"""
|
|
9
|
-
# Get the file extension
|
|
10
|
-
file_extension = file.suffix
|
|
11
|
-
# Generate a new file name
|
|
12
|
-
uuid = get_uuid()
|
|
13
|
-
new_file_name = f"{uuid}{file_extension}"
|
|
14
|
-
return new_file_name, uuid
|
|
15
|
-
|
|
16
|
-
def rename_file_uuid(old_file:Path):
|
|
17
|
-
"""
|
|
18
|
-
Rename a file by assigning a uuid while preserving file extension. Returns new filepath and uuid
|
|
19
|
-
"""
|
|
20
|
-
# Get the file extension
|
|
21
|
-
file_extension = old_file.suffix
|
|
22
|
-
# Generate a new file name
|
|
23
|
-
uuid = get_uuid()
|
|
24
|
-
new_file_name = f"{uuid}{file_extension}"
|
|
25
|
-
|
|
26
|
-
# Rename the file
|
|
27
|
-
new_file = old_file.with_name(new_file_name)
|
|
28
|
-
shutil.move(old_file.resolve().as_posix(), new_file.resolve().as_posix())
|
|
29
|
-
|
|
30
|
-
return new_file, uuid
|
endoreg_db/utils/hashs.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
def get_video_hash(video_path):
|
|
5
|
-
"""
|
|
6
|
-
Get the hash of a video file.
|
|
7
|
-
"""
|
|
8
|
-
# Open the video file in read-binary mode:
|
|
9
|
-
with open(video_path, 'rb') as f:
|
|
10
|
-
# Create the hash object, passing in the video contents for hashing:
|
|
11
|
-
hash_object = hashlib.sha256(f.read())
|
|
12
|
-
# Get the hexadecimal representation of the hash
|
|
13
|
-
video_hash = hash_object.hexdigest()
|
|
14
|
-
assert len(video_hash) <= 255, "Hash length exceeds 255 characters"
|
|
15
|
-
|
|
16
|
-
return video_hash
|
|
17
|
-
|
|
18
|
-
def get_pdf_hash(pdf_path:Path):
|
|
19
|
-
"""
|
|
20
|
-
Get the hash of a pdf file.
|
|
21
|
-
"""
|
|
22
|
-
pdf_hash = None
|
|
23
|
-
|
|
24
|
-
# Open the file in binary mode and read its contents
|
|
25
|
-
with open(pdf_path, 'rb') as f:
|
|
26
|
-
pdf_contents = f.read()
|
|
27
|
-
# Create a hash object using SHA-256 algorithm
|
|
28
|
-
|
|
29
|
-
hash_object = hashlib.sha256(pdf_contents, usedforsecurity=False)
|
|
30
|
-
# Get the hexadecimal representation of the hash
|
|
31
|
-
pdf_hash = hash_object.hexdigest()
|
|
32
|
-
assert len(pdf_hash) <= 255, "Hash length exceeds 255 characters"
|
|
33
|
-
|
|
34
|
-
return pdf_hash
|
endoreg_db/utils/legacy_ocr.py
DELETED
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
import pytesseract
|
|
2
|
-
# import cv2
|
|
3
|
-
from endoreg_db.models import EndoscopyProcessor
|
|
4
|
-
import os
|
|
5
|
-
from collections import Counter
|
|
6
|
-
from tempfile import TemporaryDirectory
|
|
7
|
-
import re
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
from typing import Dict, List
|
|
10
|
-
import numpy as np
|
|
11
|
-
|
|
12
|
-
N_FRAMES_MEAN_OCR = 2
|
|
13
|
-
|
|
14
|
-
# Helper function to process date strings
|
|
15
|
-
def process_date_text(date_text):
|
|
16
|
-
"""
|
|
17
|
-
Processes a string of text that represents a date and returns a datetime.date object.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
date_text (str): A string of text that represents a date.
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
datetime.date: A datetime.date object representing the parsed date, or None if the text cannot be parsed.
|
|
24
|
-
"""
|
|
25
|
-
try:
|
|
26
|
-
# Remove any non-digit characters
|
|
27
|
-
date_text_clean = re.sub(r'\D', '', date_text)
|
|
28
|
-
# Reformat to 'ddmmyyyy' if necessary
|
|
29
|
-
if len(date_text_clean) == 8:
|
|
30
|
-
return datetime.strptime(date_text_clean, "%d%m%Y").date()
|
|
31
|
-
elif len(date_text_clean) == 14:
|
|
32
|
-
return datetime.strptime(date_text_clean, "%d%m%Y%H%M%S").date()
|
|
33
|
-
except ValueError:
|
|
34
|
-
# Return None if the text cannot be parsed into a date
|
|
35
|
-
# set date to 1/1/1900
|
|
36
|
-
return datetime.strptime("01011900", "%d%m%Y").date()
|
|
37
|
-
|
|
38
|
-
# Helper function to process patient names
|
|
39
|
-
def process_name_text(name_text):
|
|
40
|
-
"""
|
|
41
|
-
Remove all numbers, punctuation, and whitespace from a string of text and return the result.
|
|
42
|
-
"""
|
|
43
|
-
name = re.sub(r'[0-9!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\s]+', '', name_text).strip()
|
|
44
|
-
# capitalize first letter of each word
|
|
45
|
-
name = ' '.join([word.capitalize() for word in name.split()])
|
|
46
|
-
return name
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# Helper function to process endoscope type text
|
|
50
|
-
def process_general_text(endoscope_text):
|
|
51
|
-
"""
|
|
52
|
-
This function takes in a string of text from an endoscope and returns a cleaned version of the text.
|
|
53
|
-
"""
|
|
54
|
-
return ' '.join(endoscope_text.split())
|
|
55
|
-
|
|
56
|
-
def roi_values_valid(roi):
|
|
57
|
-
"""
|
|
58
|
-
Check if all values in an ROI dictionary are valid (>=0).
|
|
59
|
-
"""
|
|
60
|
-
return all([value >= 0 for value in roi.values()])
|
|
61
|
-
|
|
62
|
-
# Function to extract text from ROIs
|
|
63
|
-
def extract_text_from_rois(image_path, processor:EndoscopyProcessor):
|
|
64
|
-
"""
|
|
65
|
-
Extracts text from regions of interest (ROIs) in an image using OCR.
|
|
66
|
-
|
|
67
|
-
Args:
|
|
68
|
-
image_path (str): The path to the image file.
|
|
69
|
-
processor (EndoscopyProcessor): An instance of the EndoscopyProcessor class.
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
dict: A dictionary containing the extracted text for each ROI.
|
|
73
|
-
"""
|
|
74
|
-
# Read the image using OpenCV
|
|
75
|
-
image = cv2.imread(image_path)
|
|
76
|
-
|
|
77
|
-
# Initialize the dictionary to hold the extracted text
|
|
78
|
-
extracted_texts = {}
|
|
79
|
-
|
|
80
|
-
# Define your ROIs and their corresponding post-processing functions in tuples
|
|
81
|
-
rois_with_postprocessing = [
|
|
82
|
-
('examination_date', processor.get_roi_examination_date, process_date_text),
|
|
83
|
-
("patient_first_name", processor.get_roi_patient_first_name, process_name_text),
|
|
84
|
-
('patient_last_name', processor.get_roi_patient_last_name, process_name_text),
|
|
85
|
-
('patient_dob', processor.get_roi_patient_dob, process_date_text),
|
|
86
|
-
('endoscope_type', processor.get_roi_endoscope_type, process_general_text),
|
|
87
|
-
('endoscope_sn', processor.get_roi_endoscopy_sn, process_general_text),
|
|
88
|
-
]
|
|
89
|
-
|
|
90
|
-
# Extract and post-process text for each ROI
|
|
91
|
-
for roi_name, roi_function, post_process in rois_with_postprocessing:
|
|
92
|
-
# Get the ROI dictionary
|
|
93
|
-
roi = roi_function()
|
|
94
|
-
|
|
95
|
-
# Check if the ROI has values
|
|
96
|
-
|
|
97
|
-
if roi_values_valid(roi):
|
|
98
|
-
# Crop the image to the ROI
|
|
99
|
-
x, y, w, h = roi['x'], roi['y'], roi['width'], roi['height']
|
|
100
|
-
roi_cropped = image[y:y+h, x:x+w]
|
|
101
|
-
# Convert to grayscale
|
|
102
|
-
gray = cv2.cvtColor(roi_cropped, cv2.COLOR_BGR2GRAY)
|
|
103
|
-
|
|
104
|
-
# Invert colors for white text on black background
|
|
105
|
-
gray = cv2.bitwise_not(gray)
|
|
106
|
-
|
|
107
|
-
# Binarize the image - using Otsu's method
|
|
108
|
-
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
109
|
-
|
|
110
|
-
# Dilate the image to improve the contour of the pixelated text
|
|
111
|
-
kernel = np.ones((2,2), np.uint8)
|
|
112
|
-
dilation = cv2.dilate(binary, kernel, iterations=1)
|
|
113
|
-
|
|
114
|
-
# OCR configuration: Recognize white text on black background without corrections
|
|
115
|
-
config = '--psm 10 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-üöäÜÖÄß'
|
|
116
|
-
|
|
117
|
-
# Use pytesseract to do OCR on the preprocessed ROI
|
|
118
|
-
text = pytesseract.image_to_string(dilation, config=config).strip()
|
|
119
|
-
|
|
120
|
-
# Post-process extracted text
|
|
121
|
-
processed_text = post_process(text)
|
|
122
|
-
# processed_text = text
|
|
123
|
-
|
|
124
|
-
# Store the processed text in the dictionary
|
|
125
|
-
extracted_texts[roi_name] = processed_text
|
|
126
|
-
|
|
127
|
-
else:
|
|
128
|
-
pass
|
|
129
|
-
# ic(roi_name)
|
|
130
|
-
# ic(roi)
|
|
131
|
-
# ic("No values for this ROI")
|
|
132
|
-
|
|
133
|
-
return extracted_texts
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def get_most_frequent_values(rois_texts: Dict[str, List[str]]) -> Dict[str, str]:
|
|
137
|
-
"""
|
|
138
|
-
Given a dictionary of ROIs and their corresponding texts, returns a dictionary of the most frequent text for each ROI.
|
|
139
|
-
|
|
140
|
-
Args:
|
|
141
|
-
rois_texts: A dictionary where the keys are the names of the ROIs and the values are lists of texts.
|
|
142
|
-
|
|
143
|
-
Returns:
|
|
144
|
-
A dictionary where the keys are the names of the ROIs and the values are the most frequent text for each ROI.
|
|
145
|
-
"""
|
|
146
|
-
most_frequent = {}
|
|
147
|
-
for key in rois_texts.keys():
|
|
148
|
-
counter = Counter([text for text in rois_texts[key] if text])
|
|
149
|
-
# ic(key)
|
|
150
|
-
# ic(counter)
|
|
151
|
-
most_frequent[key], _ = counter.most_common(1)[0] if counter else (None, None)
|
|
152
|
-
return most_frequent
|
|
153
|
-
|
|
154
|
-
def process_video(video_path, processor):
|
|
155
|
-
"""
|
|
156
|
-
Processes a video file by extracting text from regions of interest (ROIs) in each frame.
|
|
157
|
-
|
|
158
|
-
Args:
|
|
159
|
-
video_path (str): The path to the video file to process.
|
|
160
|
-
processor (OCRProcessor): An instance of the OCRProcessor class that defines the ROIs to extract text from.
|
|
161
|
-
|
|
162
|
-
Returns:
|
|
163
|
-
dict: A dictionary containing the most frequent text values extracted from each ROI.
|
|
164
|
-
"""
|
|
165
|
-
# Create a temporary directory to store frames
|
|
166
|
-
with TemporaryDirectory() as temp_dir:
|
|
167
|
-
# ic(temp_dir)
|
|
168
|
-
# Capture the video
|
|
169
|
-
video = cv2.VideoCapture(video_path)
|
|
170
|
-
success, frame_number = True, 0
|
|
171
|
-
rois_texts = {roi_name: [] for roi_name in processor.get_rois().keys()}
|
|
172
|
-
frames_for_mean_extraction = 0
|
|
173
|
-
|
|
174
|
-
while success:
|
|
175
|
-
success, frame = video.read()
|
|
176
|
-
|
|
177
|
-
# Check if this is the 200th frame
|
|
178
|
-
if frame_number % 1000 == 0 and success:
|
|
179
|
-
frame_path = os.path.join(temp_dir, f"frame_{frame_number}.jpg")
|
|
180
|
-
cv2.imwrite(frame_path, frame) # Save the frame as a JPEG file
|
|
181
|
-
# cv2.imwrite(f"_tmp/frame_{frame_number}.jpg", frame)
|
|
182
|
-
|
|
183
|
-
# Extract text from ROIs
|
|
184
|
-
extracted_texts = extract_text_from_rois(frame_path, processor)
|
|
185
|
-
# ic(extracted_texts)
|
|
186
|
-
|
|
187
|
-
# Store the extracted text from each ROI
|
|
188
|
-
for key, text in extracted_texts.items():
|
|
189
|
-
rois_texts[key].append(text)
|
|
190
|
-
frames_for_mean_extraction += 1
|
|
191
|
-
|
|
192
|
-
frame_number += 1
|
|
193
|
-
|
|
194
|
-
if frames_for_mean_extraction >= N_FRAMES_MEAN_OCR: break
|
|
195
|
-
|
|
196
|
-
# Release the video capture object
|
|
197
|
-
video.release()
|
|
198
|
-
|
|
199
|
-
# Get the most frequent values for each ROI
|
|
200
|
-
return get_most_frequent_values(rois_texts)
|
|
201
|
-
|
endoreg_db/utils/ocr.py
DELETED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
import pytesseract
|
|
2
|
-
from PIL import Image, ImageOps, ImageFilter
|
|
3
|
-
import os
|
|
4
|
-
from collections import Counter
|
|
5
|
-
from tempfile import TemporaryDirectory
|
|
6
|
-
import re
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from typing import Dict, List
|
|
9
|
-
import numpy as np
|
|
10
|
-
from endoreg_db.utils.cropping import crop_and_insert
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
N_FRAMES_MEAN_OCR = 2
|
|
15
|
-
|
|
16
|
-
# Helper function to process date strings
|
|
17
|
-
def process_date_text(date_text):
|
|
18
|
-
"""
|
|
19
|
-
Processes a string of text that represents a date and returns a datetime.date object.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
date_text (str): A string of text that represents a date.
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
datetime.date: A datetime.date object representing the parsed date, or None if the text cannot be parsed.
|
|
26
|
-
"""
|
|
27
|
-
try:
|
|
28
|
-
# Remove any non-digit characters
|
|
29
|
-
date_text_clean = re.sub(r'\D', '', date_text)
|
|
30
|
-
# Reformat to 'ddmmyyyy' if necessary
|
|
31
|
-
if len(date_text_clean) == 8:
|
|
32
|
-
return datetime.strptime(date_text_clean, "%d%m%Y").date()
|
|
33
|
-
elif len(date_text_clean) == 14:
|
|
34
|
-
return datetime.strptime(date_text_clean, "%d%m%Y%H%M%S").date()
|
|
35
|
-
except ValueError:
|
|
36
|
-
# Return None if the text cannot be parsed into a date
|
|
37
|
-
# set date to 1/1/1900
|
|
38
|
-
return datetime.strptime("01011900", "%d%m%Y").date()
|
|
39
|
-
|
|
40
|
-
# Helper function to process patient names
|
|
41
|
-
def process_name_text(name_text):
|
|
42
|
-
"""
|
|
43
|
-
Remove all numbers, punctuation, and whitespace from a string of text and return the result.
|
|
44
|
-
"""
|
|
45
|
-
name = re.sub(r'[0-9!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\s]+', '', name_text).strip()
|
|
46
|
-
# capitalize first letter of each word
|
|
47
|
-
name = ' '.join([word.capitalize() for word in name.split()])
|
|
48
|
-
return name
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Helper function to process endoscope type text
|
|
52
|
-
def process_general_text(endoscope_text):
|
|
53
|
-
"""
|
|
54
|
-
This function takes in a string of text from an endoscope and returns a cleaned version of the text.
|
|
55
|
-
"""
|
|
56
|
-
return ' '.join(endoscope_text.split())
|
|
57
|
-
|
|
58
|
-
def roi_values_valid(roi):
|
|
59
|
-
"""
|
|
60
|
-
Check if all values in an ROI dictionary are valid (>=0).
|
|
61
|
-
"""
|
|
62
|
-
return all([value >= 0 for value in roi.values()])
|
|
63
|
-
|
|
64
|
-
# Function to extract text from ROIs
|
|
65
|
-
def extract_text_from_rois(image_path, processor):
|
|
66
|
-
"""
|
|
67
|
-
Extracts text from regions of interest (ROIs) in an image using OCR.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
image_path (str): The path to the image file.
|
|
71
|
-
processor (EndoscopyProcessor): An instance of the EndoscopyProcessor class.
|
|
72
|
-
|
|
73
|
-
Returns:
|
|
74
|
-
dict: A dictionary containing the extracted text for each ROI.
|
|
75
|
-
"""
|
|
76
|
-
# Read the image using Pillow
|
|
77
|
-
image = Image.open(image_path)
|
|
78
|
-
image_dimensions = image.size # (width, height)
|
|
79
|
-
|
|
80
|
-
####### Adjust Image #######
|
|
81
|
-
# Convert to grayscale
|
|
82
|
-
gray = image.convert('L')
|
|
83
|
-
|
|
84
|
-
# Invert colors for white text on black background
|
|
85
|
-
inverted = ImageOps.invert(gray)
|
|
86
|
-
|
|
87
|
-
# Initialize the dictionary to hold the extracted text
|
|
88
|
-
extracted_texts = {}
|
|
89
|
-
|
|
90
|
-
# Define your ROIs and their corresponding post-processing functions in tuples
|
|
91
|
-
rois_with_postprocessing = [
|
|
92
|
-
('examination_date', processor.get_roi_examination_date, process_date_text),
|
|
93
|
-
("patient_first_name", processor.get_roi_patient_first_name, process_name_text),
|
|
94
|
-
('patient_last_name', processor.get_roi_patient_last_name, process_name_text),
|
|
95
|
-
('patient_dob', processor.get_roi_patient_dob, process_date_text),
|
|
96
|
-
('endoscope_type', processor.get_roi_endoscope_type, process_general_text),
|
|
97
|
-
('endoscope_sn', processor.get_roi_endoscopy_sn, process_general_text),
|
|
98
|
-
]
|
|
99
|
-
|
|
100
|
-
# Extract and post-process text for each ROI
|
|
101
|
-
for roi_name, roi_function, post_process in rois_with_postprocessing:
|
|
102
|
-
# Get the ROI dictionary
|
|
103
|
-
roi = roi_function()
|
|
104
|
-
|
|
105
|
-
# Check if the ROI has values
|
|
106
|
-
|
|
107
|
-
if roi_values_valid(roi):
|
|
108
|
-
x, y, w, h = roi['x'], roi['y'], roi['width'], roi['height']
|
|
109
|
-
|
|
110
|
-
# Get white image with original shape and just the roi remaining
|
|
111
|
-
roi_image = crop_and_insert(inverted, x,y,h,w)
|
|
112
|
-
|
|
113
|
-
# OCR configuration: Recognize white text on black background without corrections
|
|
114
|
-
config = '--psm 10 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-üöäÜÖÄß'
|
|
115
|
-
|
|
116
|
-
# Use pytesseract to do OCR on the preprocessed ROI
|
|
117
|
-
text = pytesseract.image_to_string(roi_image, config=config).strip()
|
|
118
|
-
|
|
119
|
-
# Post-process extracted text
|
|
120
|
-
processed_text = post_process(text)
|
|
121
|
-
|
|
122
|
-
extracted_texts[roi_name] = processed_text
|
|
123
|
-
|
|
124
|
-
else:
|
|
125
|
-
pass
|
|
126
|
-
|
|
127
|
-
return extracted_texts
|
|
128
|
-
|
|
129
|
-
def get_most_frequent_values(rois_texts: Dict[str, List[str]]) -> Dict[str, str]:
|
|
130
|
-
"""
|
|
131
|
-
Given a dictionary of ROIs and their corresponding texts, returns a dictionary of the most frequent text for each ROI.
|
|
132
|
-
|
|
133
|
-
Args:
|
|
134
|
-
rois_texts: A dictionary where the keys are the names of the ROIs and the values are lists of texts.
|
|
135
|
-
|
|
136
|
-
Returns:
|
|
137
|
-
A dictionary where the keys are the names of the ROIs and the values are the most frequent text for each ROI.
|
|
138
|
-
"""
|
|
139
|
-
most_frequent = {}
|
|
140
|
-
for key in rois_texts.keys():
|
|
141
|
-
counter = Counter([text for text in rois_texts[key] if text])
|
|
142
|
-
most_frequent[key], _ = counter.most_common(1)[0] if counter else (None, None)
|
|
143
|
-
return most_frequent
|
|
144
|
-
|
|
145
|
-
def process_video(video_path, processor):
|
|
146
|
-
"""
|
|
147
|
-
Processes a video file by extracting text from regions of interest (ROIs) in each frame.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
video_path (str): The path to the video file to process.
|
|
151
|
-
processor (OCRProcessor): An instance of the OCRProcessor class that defines the ROIs to extract text from.
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
dict: A dictionary containing the most frequent text values extracted from each ROI.
|
|
155
|
-
"""
|
|
156
|
-
# Create a temporary directory to store frames
|
|
157
|
-
with TemporaryDirectory() as temp_dir:
|
|
158
|
-
# Capture the video
|
|
159
|
-
video = cv2.VideoCapture(video_path)
|
|
160
|
-
success, frame_number = True, 0
|
|
161
|
-
rois_texts = {roi_name: [] for roi_name in processor.get_rois().keys()}
|
|
162
|
-
frames_for_mean_extraction = 0
|
|
163
|
-
|
|
164
|
-
while success:
|
|
165
|
-
success, frame = video.read()
|
|
166
|
-
|
|
167
|
-
# Check if this is the 200th frame
|
|
168
|
-
if frame_number % 1000 == 0 and success:
|
|
169
|
-
frame_path = os.path.join(temp_dir, f"frame_{frame_number}.jpg")
|
|
170
|
-
cv2.imwrite(frame_path, frame) # Save the frame as a JPEG file
|
|
171
|
-
# cv2.imwrite(f"_tmp/frame_{frame_number}.jpg", frame)
|
|
172
|
-
|
|
173
|
-
# Extract text from ROIs
|
|
174
|
-
extracted_texts = extract_text_from_rois(frame_path, processor)
|
|
175
|
-
|
|
176
|
-
# Store the extracted text from each ROI
|
|
177
|
-
for key, text in extracted_texts.items():
|
|
178
|
-
rois_texts[key].append(text)
|
|
179
|
-
frames_for_mean_extraction += 1
|
|
180
|
-
|
|
181
|
-
frame_number += 1
|
|
182
|
-
|
|
183
|
-
if frames_for_mean_extraction >= N_FRAMES_MEAN_OCR: break
|
|
184
|
-
|
|
185
|
-
# Release the video capture object
|
|
186
|
-
video.release()
|
|
187
|
-
|
|
188
|
-
# Get the most frequent values for each ROI
|
|
189
|
-
return get_most_frequent_values(rois_texts)
|
|
190
|
-
|
endoreg_db/utils/uuid.py
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
# import cv2
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
import datetime
|
|
4
|
-
|
|
5
|
-
def _extract_metadata_from_filename(filepath:Path, time_format = None): # deprecated
|
|
6
|
-
"""
|
|
7
|
-
Extracts metadata from a video filename.
|
|
8
|
-
|
|
9
|
-
Args:
|
|
10
|
-
filepath (Path): The path to the video file.
|
|
11
|
-
time_format (str, optional): The format of the date in the filename. Defaults to None.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
dict: A dictionary containing the extracted metadata.
|
|
15
|
-
|
|
16
|
-
Example return value:
|
|
17
|
-
{
|
|
18
|
-
'examination_date': '2021-03-03',
|
|
19
|
-
'patient_last_name': 'Doe',
|
|
20
|
-
'patient_first_name': 'John',
|
|
21
|
-
'patient_dob': '1990-01-01'
|
|
22
|
-
}
|
|
23
|
-
"""
|
|
24
|
-
# get filename without suffix
|
|
25
|
-
filename = filepath.stem
|
|
26
|
-
|
|
27
|
-
if not time_format:
|
|
28
|
-
time_format = "%d.%m.%Y"
|
|
29
|
-
|
|
30
|
-
_info = [_.strip() for _ in filename.split("_")]
|
|
31
|
-
|
|
32
|
-
if len(_info) == 4:
|
|
33
|
-
examination_date, last_name, first_name, birthdate = _info
|
|
34
|
-
|
|
35
|
-
else:
|
|
36
|
-
examination_date, last_name, first_name, birthdate = None, None, None, None
|
|
37
|
-
|
|
38
|
-
metadata = {}
|
|
39
|
-
metadata['examination_date'] = examination_date
|
|
40
|
-
metadata['patient_last_name'] = last_name
|
|
41
|
-
metadata['patient_first_name'] = first_name
|
|
42
|
-
metadata['patient_dob'] = birthdate
|
|
43
|
-
|
|
44
|
-
try:
|
|
45
|
-
metadata['examination_date'] = datetime.datetime.strptime(examination_date, time_format).date()
|
|
46
|
-
metadata['examination_date'] = metadata['examination_date'].strftime("%Y-%m-%d")
|
|
47
|
-
except:
|
|
48
|
-
metadata['examination_date'] = None
|
|
49
|
-
|
|
50
|
-
try:
|
|
51
|
-
metadata['patient_dob'] = datetime.datetime.strptime(birthdate, time_format).date()
|
|
52
|
-
metadata['patient_dob'] = metadata['patient_dob'].strftime("%Y-%m-%d")
|
|
53
|
-
except:
|
|
54
|
-
metadata['patient_dob'] = None
|
|
55
|
-
|
|
56
|
-
return metadata
|
|
57
|
-
|
|
58
|
-
def _get_video_metadata(file_path): # Deprecated
|
|
59
|
-
"""
|
|
60
|
-
Returns the framerate, dimensions, and duration of a video file.
|
|
61
|
-
|
|
62
|
-
Parameters:
|
|
63
|
-
file_path (str): The path to the video file.
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
tuple: A tuple containing the framerate (float), dimensions (tuple of ints), and duration (float) of the video.
|
|
67
|
-
"""
|
|
68
|
-
# Open the video file
|
|
69
|
-
video = cv2.VideoCapture(file_path)
|
|
70
|
-
|
|
71
|
-
# Get the video framerate
|
|
72
|
-
fps = video.get(cv2.CAP_PROP_FPS)
|
|
73
|
-
|
|
74
|
-
# Get the video dimensions
|
|
75
|
-
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
76
|
-
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
77
|
-
dimensions = (width, height)
|
|
78
|
-
|
|
79
|
-
# Get the video duration
|
|
80
|
-
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
81
|
-
duration = frame_count / fps
|
|
82
|
-
|
|
83
|
-
# Release the video file
|
|
84
|
-
video.release()
|
|
85
|
-
|
|
86
|
-
# Return the metadata
|
|
87
|
-
return fps, dimensions, duration
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: endoreg-db
|
|
3
|
-
Version: 0.5.2
|
|
4
|
-
Summary: EndoReg Db Django App
|
|
5
|
-
Requires-Python: >=3.11
|
|
6
|
-
Requires-Dist: agl-report-reader>=0.4.0
|
|
7
|
-
Requires-Dist: django-bootstrap5>=24.3
|
|
8
|
-
Requires-Dist: django>=5.1.3
|
|
9
|
-
Requires-Dist: djangorestframework>=3.15.2
|
|
10
|
-
Requires-Dist: ffmpeg-python>=0.2.0
|
|
11
|
-
Requires-Dist: opencv-python>=4.10.0.84
|
|
12
|
-
Requires-Dist: pandas>=2.2.3
|
|
13
|
-
Requires-Dist: pillow>=11.0.0
|
|
14
|
-
Requires-Dist: pytesseract>=0.3.13
|
|
15
|
-
Requires-Dist: pyyaml>=6.0.2
|
|
16
|
-
Requires-Dist: scipy>=1.14.1
|
|
17
|
-
Requires-Dist: tqdm>=4.67.0
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
|
|
20
|
-
# endoreg-db
|
|
21
|
-
endoreg-db
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
## Poetry add pypi token
|
|
25
|
-
poetry config pypi-token.pypi your-api-token
|
|
26
|
-
|
|
27
|
-
|